├── .github
└── workflows
│ └── ci.yaml
├── .gitignore
├── CHANGELOG.md
├── LICENSE
├── README.md
├── cli
├── README.md
├── pom.xml
└── src
│ ├── main
│ ├── java
│ │ └── com
│ │ │ └── crawljax
│ │ │ └── cli
│ │ │ ├── JarRunner.java
│ │ │ ├── LogUtil.java
│ │ │ └── ParameterInterpeter.java
│ └── resources
│ │ ├── jar-with-dependencies.xml
│ │ ├── logback.xml
│ │ └── project.version
│ └── test
│ └── java
│ └── com
│ └── crawljax
│ ├── cli
│ ├── JarRunnerTest.java
│ └── LogUtilTest.java
│ └── test
│ └── util
│ └── CaptureSystemStreams.java
├── core
├── pom.xml
└── src
│ ├── main
│ ├── java
│ │ └── com
│ │ │ └── crawljax
│ │ │ ├── browser
│ │ │ ├── EmbeddedBrowser.java
│ │ │ ├── WebDriverBackedEmbeddedBrowser.java
│ │ │ ├── WebDriverBrowserBuilder.java
│ │ │ └── package-info.java
│ │ │ ├── condition
│ │ │ ├── Condition.java
│ │ │ ├── ConditionType.java
│ │ │ ├── ConditionTypeChecker.java
│ │ │ ├── CountCondition.java
│ │ │ ├── JavaScriptCondition.java
│ │ │ ├── Logic.java
│ │ │ ├── NotRegexCondition.java
│ │ │ ├── NotUrlCondition.java
│ │ │ ├── NotVisibleCondition.java
│ │ │ ├── NotXPathCondition.java
│ │ │ ├── RegexCondition.java
│ │ │ ├── UrlCondition.java
│ │ │ ├── VisibleCondition.java
│ │ │ ├── XPathCondition.java
│ │ │ ├── browserwaiter
│ │ │ │ ├── ExpectedCondition.java
│ │ │ │ ├── ExpectedElementCondition.java
│ │ │ │ ├── ExpectedVisibleCondition.java
│ │ │ │ ├── WaitCondition.java
│ │ │ │ ├── WaitConditionChecker.java
│ │ │ │ └── package-info.java
│ │ │ ├── crawlcondition
│ │ │ │ ├── CrawlCondition.java
│ │ │ │ └── package-info.java
│ │ │ ├── eventablecondition
│ │ │ │ ├── EventableCondition.java
│ │ │ │ ├── EventableConditionChecker.java
│ │ │ │ └── package-info.java
│ │ │ └── invariant
│ │ │ │ ├── Invariant.java
│ │ │ │ └── package-info.java
│ │ │ ├── core
│ │ │ ├── CandidateCrawlAction.java
│ │ │ ├── CandidateElement.java
│ │ │ ├── CandidateElementExtractor.java
│ │ │ ├── CandidateElementManager.java
│ │ │ ├── CrawlController.java
│ │ │ ├── CrawlQueue.java
│ │ │ ├── CrawlSession.java
│ │ │ ├── CrawlSessionNotSetupYetException.java
│ │ │ ├── CrawlTaskConsumer.java
│ │ │ ├── Crawler.java
│ │ │ ├── CrawlerContext.java
│ │ │ ├── CrawlerLeftDomainException.java
│ │ │ ├── CrawljaxException.java
│ │ │ ├── CrawljaxRunner.java
│ │ │ ├── ExitNotifier.java
│ │ │ ├── ExtractorManager.java
│ │ │ ├── StateUnreachableException.java
│ │ │ ├── UnfiredCandidateActions.java
│ │ │ ├── configuration
│ │ │ │ ├── AcceptAllFramesChecker.java
│ │ │ │ ├── BrowserConfiguration.java
│ │ │ │ ├── ConfigurationHelper.java
│ │ │ │ ├── CrawlActionsBuilder.java
│ │ │ │ ├── CrawlElement.java
│ │ │ │ ├── CrawlRules.java
│ │ │ │ ├── CrawlScope.java
│ │ │ │ ├── CrawljaxConfiguration.java
│ │ │ │ ├── DefaultCrawlScope.java
│ │ │ │ ├── DefaultUnexpectedAlertHandler.java
│ │ │ │ ├── Form.java
│ │ │ │ ├── FormAction.java
│ │ │ │ ├── FormInputField.java
│ │ │ │ ├── IgnoreFrameChecker.java
│ │ │ │ ├── InputField.java
│ │ │ │ ├── InputSpecification.java
│ │ │ │ ├── PreCrawlConfiguration.java
│ │ │ │ ├── ProxyConfiguration.java
│ │ │ │ └── UnexpectedAlertHandler.java
│ │ │ ├── exception
│ │ │ │ ├── BrowserConnectionException.java
│ │ │ │ ├── CrawlPathToException.java
│ │ │ │ └── package-info.java
│ │ │ ├── plugin
│ │ │ │ ├── DomChangeNotifierPlugin.java
│ │ │ │ ├── GeneratesOutput.java
│ │ │ │ ├── HostInterface.java
│ │ │ │ ├── HostInterfaceImpl.java
│ │ │ │ ├── OnBrowserCreatedPlugin.java
│ │ │ │ ├── OnFireEventFailedPlugin.java
│ │ │ │ ├── OnInvariantViolationPlugin.java
│ │ │ │ ├── OnNewStatePlugin.java
│ │ │ │ ├── OnRevisitStatePlugin.java
│ │ │ │ ├── OnUrlLoadPlugin.java
│ │ │ │ ├── Plugin.java
│ │ │ │ ├── Plugins.java
│ │ │ │ ├── PostCrawlingPlugin.java
│ │ │ │ ├── PreCrawlingPlugin.java
│ │ │ │ ├── PreStateCrawlingPlugin.java
│ │ │ │ └── descriptor
│ │ │ │ │ ├── Parameter.java
│ │ │ │ │ ├── PluginDescriptor.java
│ │ │ │ │ └── jaxb
│ │ │ │ │ └── generated
│ │ │ │ │ ├── ObjectFactory.java
│ │ │ │ │ ├── OptionList.java
│ │ │ │ │ ├── Parameter.java
│ │ │ │ │ ├── ParameterList.java
│ │ │ │ │ ├── PluginDescriptor.java
│ │ │ │ │ └── VersionList.java
│ │ │ └── state
│ │ │ │ ├── CrawlPath.java
│ │ │ │ ├── DefaultStateVertexFactory.java
│ │ │ │ ├── Element.java
│ │ │ │ ├── Eventable.java
│ │ │ │ ├── Identification.java
│ │ │ │ ├── InMemoryStateFlowGraph.java
│ │ │ │ ├── StateFlowGraph.java
│ │ │ │ ├── StateMachine.java
│ │ │ │ ├── StateVertex.java
│ │ │ │ ├── StateVertexFactory.java
│ │ │ │ └── StateVertexImpl.java
│ │ │ ├── di
│ │ │ ├── ConfigurationModule.java
│ │ │ ├── CoreModule.java
│ │ │ └── CrawlSessionProvider.java
│ │ │ ├── forms
│ │ │ ├── FormHandler.java
│ │ │ ├── FormInput.java
│ │ │ ├── FormInputValueHelper.java
│ │ │ ├── InputValue.java
│ │ │ ├── RandomInputValueGenerator.java
│ │ │ └── package-info.java
│ │ │ ├── metrics
│ │ │ └── MetricsModule.java
│ │ │ ├── oraclecomparator
│ │ │ ├── AbstractComparator.java
│ │ │ ├── Comparator.java
│ │ │ ├── OracleComparator.java
│ │ │ ├── StateComparator.java
│ │ │ ├── comparators
│ │ │ │ ├── AttributeComparator.java
│ │ │ │ ├── DateComparator.java
│ │ │ │ ├── EditDistanceComparator.java
│ │ │ │ ├── PlainStructureComparator.java
│ │ │ │ ├── RegexComparator.java
│ │ │ │ ├── ScriptComparator.java
│ │ │ │ ├── SimpleComparator.java
│ │ │ │ ├── StyleComparator.java
│ │ │ │ ├── XPathExpressionComparator.java
│ │ │ │ └── package-info.java
│ │ │ └── package-info.java
│ │ │ └── util
│ │ │ ├── DOMComparer.java
│ │ │ ├── DomDifferenceListener.java
│ │ │ ├── DomHistoryElement.java
│ │ │ ├── DomUtils.java
│ │ │ ├── ElementResolver.java
│ │ │ ├── HtmlNamespace.java
│ │ │ ├── UrlUtils.java
│ │ │ ├── XMLObject.java
│ │ │ ├── XPathHelper.java
│ │ │ └── package-info.java
│ └── resources
│ │ └── com
│ │ └── crawljax
│ │ └── core
│ │ └── plugin
│ │ └── descriptor
│ │ └── jaxb
│ │ └── plugin-descriptor.xsd
│ └── test
│ ├── java
│ └── com
│ │ └── crawljax
│ │ ├── browser
│ │ ├── BrowserClosesDownloadPopUp.java
│ │ ├── BrowserProvider.java
│ │ ├── ChromeProxyConfig.java
│ │ ├── WebDriverBackedEmbeddedBrowserNoCrashTest.java
│ │ ├── WebDriverBackedEmbeddedBrowserTest.java
│ │ └── matchers
│ │ │ └── StateFlowGraphMatchers.java
│ │ ├── condition
│ │ ├── BrowserDoesntLeaveUrlTest.java
│ │ ├── ConditionTest.java
│ │ └── browserwaiter
│ │ │ └── WaitConditionTest.java
│ │ ├── core
│ │ ├── CandidateElementExtractorTest.java
│ │ ├── CandidateElementManagerTest.java
│ │ ├── CandidateElementTest.java
│ │ ├── CrawlControllerTest.java
│ │ ├── CrawlerStopTest.java
│ │ ├── CrawlerTest.java
│ │ ├── ExitNotifierTest.java
│ │ ├── IFrameTest.java
│ │ ├── NestedFramesTest.java
│ │ ├── PassBasicHttpAuthTest.java
│ │ ├── PopUpTest.java
│ │ ├── configuration
│ │ │ ├── CrawlActionsTest.java
│ │ │ ├── CrawlElementMatcher.java
│ │ │ ├── CrawljaxConfigurationBuilderTest.java
│ │ │ ├── DefaultCrawlScopeTest.java
│ │ │ ├── UnderXPathTest.java
│ │ │ └── XPathEscapeApostropheTest.java
│ │ ├── largetests
│ │ │ ├── LargeChromeTest.java
│ │ │ ├── LargeFirefoxTest.java
│ │ │ ├── LargeIETest.java
│ │ │ ├── LargePhantomJSTest.java
│ │ │ └── LargeTestBase.java
│ │ ├── plugin
│ │ │ ├── OnFireEventFailedPluginTest.java
│ │ │ ├── PluginsTest.java
│ │ │ └── PluginsWithCrawlerTest.java
│ │ └── state
│ │ │ ├── ElementTest.java
│ │ │ ├── EventableTest.java
│ │ │ ├── PostCrawlStateGraphChecker.java
│ │ │ ├── StateFlowGraphTest.java
│ │ │ ├── StateMachineTest.java
│ │ │ ├── StateVertexFactoryTest.java
│ │ │ ├── StateVertexTest.java
│ │ │ └── StatesContainElementsTest.java
│ │ ├── crawls
│ │ ├── CrawlConcurrently.java
│ │ ├── CrawlHiddenElementsTest.java
│ │ └── CrawlWithCustomScopeTest.java
│ │ ├── forms
│ │ ├── FormHandlerTest.java
│ │ └── RandomInputValueGeneratorTest.java
│ │ ├── oracle
│ │ └── OracleTest.java
│ │ ├── oraclecomparator
│ │ └── comparators
│ │ │ └── EditDistanceTest.java
│ │ ├── test
│ │ ├── BaseCrawler.java
│ │ ├── BrowserTest.java
│ │ ├── RunWithWebServer.java
│ │ ├── Utils.java
│ │ ├── WebServer.java
│ │ ├── WebServerTest.java
│ │ └── matchers
│ │ │ └── FileMatcher.java
│ │ └── util
│ │ ├── DOMComparerTest.java
│ │ ├── DomUtilsBrowserTest.java
│ │ ├── DomUtilsTest.java
│ │ ├── HtmlNamespaceTest.java
│ │ ├── UrlUtilsTest.java
│ │ ├── XMLObjectTest.java
│ │ ├── XPathHelperTest.java
│ │ └── XmlunitDifferenceTest.java
│ └── resources
│ ├── candidateElementExtractorTest
│ ├── domWithFourTypeDownloadLink.html
│ └── domWithOneExternalAndTwoInternal.html
│ ├── configuration
│ └── crawljax.properties
│ ├── demo-site
│ ├── conditions.html
│ ├── crawlConditions.html
│ ├── customInput.html
│ ├── forms.html
│ ├── home.html
│ ├── img
│ │ └── nav.gif
│ ├── index.html
│ ├── info.html
│ ├── invariants.html
│ ├── js
│ │ └── general.js
│ ├── lib
│ │ └── jquery-2.0.3.min.js
│ ├── oracleComparators.html
│ ├── papers.html
│ ├── plugins.html
│ ├── randomInput.html
│ ├── select.html
│ ├── style.css
│ └── waitConditions.html
│ ├── logback-test.xml
│ ├── realm.properties
│ ├── site
│ ├── concurrentcrawl1
│ │ ├── index.html
│ │ ├── page_a.html
│ │ ├── page_b.html
│ │ └── page_c.html
│ ├── concurrentcrawl2
│ │ ├── index.html
│ │ ├── page_x.html
│ │ ├── page_y.html
│ │ └── page_z.html
│ ├── crawlconditions
│ │ └── index.html
│ ├── crawler
│ │ ├── index.html
│ │ ├── payload_10.html
│ │ ├── payload_11.html
│ │ ├── payload_2.html
│ │ ├── payload_3.html
│ │ ├── payload_4.html
│ │ ├── payload_5.html
│ │ ├── payload_6.html
│ │ ├── payload_7.html
│ │ ├── payload_8.html
│ │ └── payload_9.html
│ ├── crawlscope
│ │ ├── in_scope.html
│ │ ├── in_scope_inner.html
│ │ ├── index.html
│ │ ├── out_of_scope.html
│ │ └── out_of_scope_inner.html
│ ├── download
│ │ ├── download.blob
│ │ └── download.html
│ ├── formhandler
│ │ └── index.html
│ ├── hidden-elements-site
│ │ ├── a.html
│ │ ├── b.html
│ │ └── index.html
│ ├── home.html
│ ├── iframe
│ │ ├── iframe.html
│ │ ├── iframe2.html
│ │ ├── index.html
│ │ ├── page0-0-0.html
│ │ ├── page0-0.html
│ │ ├── page0.html
│ │ └── subiframe.html
│ ├── index.html
│ ├── infinite.html
│ ├── js
│ │ └── general.js
│ ├── lib
│ │ └── jquery-2.0.3.min.js
│ ├── navigate_other_urls.html
│ ├── popup
│ │ └── index.html
│ ├── simple.html
│ ├── simplelink
│ │ └── simplelink.html
│ ├── testCrawlElementCondition.html
│ ├── testCrawlElements.html
│ ├── testCrawlconditions.html
│ ├── testInvariants.html
│ ├── testOracleComparators.html
│ ├── testWaitCondition.html
│ └── underxpath.html
│ └── util
│ ├── domtest.html
│ └── state-10.html
├── eclipse-formatter.xml
├── examples
├── pom.xml
└── src
│ └── main
│ ├── java
│ └── com
│ │ └── crawljax
│ │ └── examples
│ │ ├── AdvancedExample.java
│ │ ├── CrawlScopeExample.java
│ │ ├── InvariantExample.java
│ │ ├── MetricPluginExample.java
│ │ ├── PluginExample.java
│ │ └── SimplestExample.java
│ └── resources
│ └── logback.xml
├── plugins
├── README.md
├── crawloverview-plugin
│ ├── .gitignore
│ ├── .travis.yml
│ ├── README.md
│ ├── pom.xml
│ └── src
│ │ ├── main
│ │ ├── java
│ │ │ └── com
│ │ │ │ └── crawljax
│ │ │ │ └── plugins
│ │ │ │ └── crawloverview
│ │ │ │ ├── BeanToReadableMap.java
│ │ │ │ ├── CrawlOverview.java
│ │ │ │ ├── CrawlOverviewException.java
│ │ │ │ ├── ImageWriter.java
│ │ │ │ ├── OutPutModelCache.java
│ │ │ │ ├── OutputBuilder.java
│ │ │ │ ├── StateBuilder.java
│ │ │ │ ├── StateWriter.java
│ │ │ │ └── model
│ │ │ │ ├── CandidateElementPosition.java
│ │ │ │ ├── Edge.java
│ │ │ │ ├── OutPutModel.java
│ │ │ │ ├── Serializer.java
│ │ │ │ ├── State.java
│ │ │ │ ├── StateCounter.java
│ │ │ │ ├── StateStatistics.java
│ │ │ │ └── Statistics.java
│ │ └── resources
│ │ │ ├── config.html
│ │ │ ├── header.html
│ │ │ ├── index.html
│ │ │ ├── nav.html
│ │ │ ├── plugin-descriptor.xml
│ │ │ ├── skeleton
│ │ │ ├── css
│ │ │ │ ├── bootstrap-responsive.min.css
│ │ │ │ ├── bootstrap.min.css
│ │ │ │ ├── shCore.css
│ │ │ │ └── shCoreDefault.css
│ │ │ ├── img
│ │ │ │ ├── glyphicons-halflings-white.png
│ │ │ │ └── glyphicons-halflings.png
│ │ │ ├── js
│ │ │ │ └── graphbuilder.js
│ │ │ └── lib
│ │ │ │ ├── bootstrap.min.js
│ │ │ │ ├── jquery-2.0.3.min.js
│ │ │ │ ├── shBrushXml.js
│ │ │ │ ├── shCore.js
│ │ │ │ └── vivagraph.min.js
│ │ │ ├── state.html
│ │ │ ├── statistics.html
│ │ │ ├── urls.html
│ │ │ └── version.html
│ │ └── test
│ │ ├── java
│ │ └── com
│ │ │ └── crawljax
│ │ │ ├── matchers
│ │ │ └── IsValidJson.java
│ │ │ └── plugins
│ │ │ └── crawloverview
│ │ │ ├── BeanToReadableMapTest.java
│ │ │ ├── CandidateElementMatcher.java
│ │ │ ├── HoverTest.java
│ │ │ ├── ImageWriterTest.java
│ │ │ ├── OutputBuilderTest.java
│ │ │ ├── OverviewIntegrationTest.java
│ │ │ ├── RunHoverCrawl.java
│ │ │ ├── SimpleSiteCrawlTest.java
│ │ │ ├── TestBean.java
│ │ │ └── model
│ │ │ └── SerializeTest.java
│ │ └── resources
│ │ ├── hover-test-site
│ │ ├── a.html
│ │ ├── b.html
│ │ ├── c.html
│ │ └── index.html
│ │ ├── logback-test.xml
│ │ ├── sampleOutModel.json
│ │ └── screenshot.png
├── pom.xml
└── test-plugin
│ ├── pom.xml
│ └── src
│ └── main
│ ├── java
│ └── com
│ │ └── crawljax
│ │ └── plugins
│ │ └── testplugin
│ │ ├── Runner.java
│ │ └── TestPlugin.java
│ └── resources
│ └── plugin-descriptor.xml
├── pom.xml
└── test-utils
├── README.md
├── pom.xml
└── src
├── main
├── java
│ └── com
│ │ └── crawljax
│ │ ├── crawltests
│ │ ├── SimpleInputSiteCrawl.java
│ │ ├── SimpleJsSiteCrawl.java
│ │ ├── SimpleSiteCrawl.java
│ │ └── SimpleXpathCrawl.java
│ │ └── rules
│ │ └── TempDirInTargetFolder.java
└── resources
│ └── sites
│ ├── lib
│ └── jquery-2.0.3.min.js
│ ├── simple-input-site
│ ├── index.html
│ └── otherState.html
│ ├── simple-js-site
│ ├── index.html
│ ├── payload_10.html
│ ├── payload_11.html
│ ├── payload_2.html
│ ├── payload_3.html
│ ├── payload_4.html
│ ├── payload_5.html
│ ├── payload_6.html
│ ├── payload_7.html
│ ├── payload_8.html
│ └── payload_9.html
│ ├── simple-site
│ ├── a.html
│ ├── b.html
│ ├── c.html
│ └── index.html
│ └── simple-xpath-site
│ ├── a.html
│ ├── b.html
│ ├── index-iframe.html
│ └── index.html
└── test
└── java
└── com
└── crawljax
└── crawljax_plugins_plugin
└── SampleCrawlersTest.java
/.github/workflows/ci.yaml:
--------------------------------------------------------------------------------
1 | name: Java CI
2 |
3 | on:
4 | push:
5 | branches:
6 | - master
7 | pull_request:
8 |
9 | jobs:
10 | build:
11 | runs-on: ubuntu-latest
12 | strategy:
13 | matrix:
14 | java: [8, 11]
15 |
16 | steps:
17 | - uses: actions/checkout@v2
18 | - uses: actions/setup-java@v1
19 | with:
20 | java-version: ${{ matrix.java }}
21 | - uses: actions/cache@v2
22 | with:
23 | path: |
24 | ~/.m2/repository
25 | key: ${{ runner.os }}-gradle-${{ matrix.java }}-${{ hashFiles('**/pom.xml') }}
26 | restore-keys: |
27 | ${{ runner.os }}-gradle-${{ matrix.java }}-
28 | ${{ runner.os }}-gradle-
29 | - run: |
30 | mkdir geckodriver
31 | GECKODRIVER_VER="0.29.0"; wget -qO - https://github.com/mozilla/geckodriver/releases/download/v$GECKODRIVER_VER/geckodriver-v$GECKODRIVER_VER-linux64.tar.gz | tar xz -C geckodriver
32 | export PATH=$PATH:$PWD/geckodriver
33 | - run: |
34 | cd core
35 | mvn install -DskipTests=true -Dmaven.javadoc.skip=true -Dgpg.skip=true -B -V
36 | mvn test -B -Pintegrationtests -Dtest.browser=FIREFOX
37 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Eclipse
2 | .classpath
3 | .project
4 | .settings/
5 |
6 | # Intellij
7 | .idea/
8 | *.iml
9 | *.iws
10 | *.ipr
11 |
12 | # Maven
13 | *.log
14 | target/
15 | release.properties
16 | pom.xml.releaseBackup
17 | git.properties
18 | output/
19 |
20 | # Mac
21 | .DS_Store
--------------------------------------------------------------------------------
/cli/README.md:
--------------------------------------------------------------------------------
1 | # Crawljax Command-line
2 | This is the Command-line distribution of Crawljax. The project is assembled in a ZIP file containing the jar that you can run to execute the crawler.
3 |
4 |
5 | Unzip the zip and in the resulting folder you can run Crawljax as follows:
6 |
7 | ```
8 | usage: java -jar crawljax-cli-version.jar theUrl theOutputDir
9 | -a,--crawlHiddenAnchors Crawl anchors even if they are not visible in the
10 | browser.
11 | -b,--browser browser type: firefox, ie, chrome, remote,
12 | htmlunit, android, iphone. Default is Firefox
13 | -click a comma separated list of HTML tags that should
14 | be clicked. Default is A and BUTTON
15 | -d,--depth crawl depth level. Default is 2
16 | -h,--help print this message
17 | -log Log to this file instead of the console
18 | -o,--override Override the output directory if non-empty
19 | -p,--parallel Number of browsers to use for crawling. Default
20 | is 1
21 | -s,--maxstates max number of states to crawl. Default is 0
22 | (unlimited)
23 | -t,--timeout Specify the maximum crawl time in minutes
24 | -v,--verbose Be extra verbose
25 | -version print the version information and exit
26 | -waitAfterEvent the time to wait after an event has been fired in
27 | milliseconds. Default is 500
28 | -waitAfterReload the time to wait after an URL has been loaded in
29 | milliseconds. Default is 500
30 | ```
31 |
32 | The output folder will containt the output of the Crawl overview plugin.
--------------------------------------------------------------------------------
/cli/src/main/java/com/crawljax/cli/LogUtil.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.cli;
2 |
3 | import org.slf4j.LoggerFactory;
4 |
5 | import ch.qos.logback.classic.Level;
6 | import ch.qos.logback.classic.Logger;
7 | import ch.qos.logback.classic.spi.ILoggingEvent;
8 | import ch.qos.logback.core.ConsoleAppender;
9 | import ch.qos.logback.core.FileAppender;
10 | import ch.qos.logback.core.encoder.Encoder;
11 |
12 | public class LogUtil {
13 |
14 | /**
15 | * Configure file logging and stop console logging.
16 | *
17 | * @param filename
18 | * Log to this file.
19 | */
20 | @SuppressWarnings("unchecked")
21 | static void logToFile(String filename) {
22 | Logger rootLogger = (Logger) LoggerFactory.getLogger(org.slf4j.Logger.ROOT_LOGGER_NAME);
23 |
24 | FileAppender fileappender = new FileAppender<>();
25 | fileappender.setContext(rootLogger.getLoggerContext());
26 | fileappender.setFile(filename);
27 | fileappender.setName("FILE");
28 |
29 | ConsoleAppender> console = (ConsoleAppender>) rootLogger.getAppender("STDOUT");
30 | fileappender.setEncoder((Encoder) console.getEncoder());
31 |
32 | fileappender.start();
33 |
34 | rootLogger.addAppender(fileappender);
35 |
36 | console.stop();
37 | }
38 |
39 | /**
40 | * @param newLevel
41 | * for com.crawljax.*
42 | */
43 | static void setCrawljaxLogLevel(Level newLevel) {
44 | Logger rootLogger = (Logger) LoggerFactory.getLogger("com.crawljax");
45 | rootLogger.setLevel(newLevel);
46 | }
47 |
48 | private LogUtil() {
49 |
50 | }
51 | }
52 |
--------------------------------------------------------------------------------
/cli/src/main/resources/jar-with-dependencies.xml:
--------------------------------------------------------------------------------
1 |
5 | distribution
6 |
7 | zip
8 |
9 |
10 |
11 | ${project.basedir}/../
12 | /
13 |
14 | LICENSE
15 |
16 |
17 |
18 | ${project.basedir}/../
19 | /crawljax-doc
20 |
21 | CHANGELOG*
22 | README*
23 |
24 |
25 |
26 | ${project.basedir}
27 | /
28 |
29 | README*
30 |
31 |
32 |
33 | src/main/resources
34 |
35 |
36 | logback.xml
37 |
38 |
39 |
40 | target
41 |
42 |
43 | *.jar
44 |
45 |
46 |
47 |
48 |
49 | /lib
50 | false
51 | runtime
52 |
53 |
54 |
55 |
--------------------------------------------------------------------------------
/cli/src/main/resources/logback.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 | %d{HH:mm:ss.SSS} [%thread] %-5level - %msg%n
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
--------------------------------------------------------------------------------
/cli/src/main/resources/project.version:
--------------------------------------------------------------------------------
1 | Version = ${project.version}
2 | Git commit id = ${git.commit.id}
3 | Git describe = ${git.commit.id.describe}
4 | Build at ${git.build.time}
--------------------------------------------------------------------------------
/cli/src/test/java/com/crawljax/test/util/CaptureSystemStreams.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.test.util;
2 |
3 | import java.io.ByteArrayOutputStream;
4 | import java.io.PrintStream;
5 |
6 | import org.junit.rules.ExternalResource;
7 |
8 | /**
9 | * Changes the {@link System#out} and {@link System#err} to a captured stream so we can inspect it
10 | * in a test. In the {@link #after()} method the streams are switched back to the default stream.
11 | */
12 | public class CaptureSystemStreams extends ExternalResource {
13 |
14 | private ByteArrayOutputStream captureErrorStream = new ByteArrayOutputStream();
15 | private ByteArrayOutputStream captureOutStream = new ByteArrayOutputStream();
16 | private PrintStream originalErrorStream;
17 | private PrintStream originalOutStream;
18 |
19 | public CaptureSystemStreams() {
20 | }
21 |
22 | @Override
23 | protected void before() throws Throwable {
24 | originalErrorStream = System.err;
25 | originalOutStream = System.out;
26 | System.setErr(new PrintStream(captureErrorStream));
27 | System.setOut(new PrintStream(captureOutStream));
28 | }
29 |
30 | @Override
31 | protected void after() {
32 | PrintStream tempErrStream = System.err;
33 | System.setErr(originalErrorStream);
34 | tempErrStream.close();
35 |
36 | PrintStream tempOutStream = System.out;
37 | System.setOut(originalOutStream);
38 | tempOutStream.close();
39 | }
40 |
41 | public String getConsoleOutput() {
42 | return captureOutStream.toString();
43 | }
44 |
45 | public String getErrorOutput() {
46 | return captureErrorStream.toString();
47 | }
48 | }
49 |
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/browser/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 | * This is the core package for the browsers.
3 | */
4 | package com.crawljax.browser;
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/condition/Condition.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.condition;
2 |
3 | import net.jcip.annotations.ThreadSafe;
4 |
5 | import com.crawljax.browser.EmbeddedBrowser;
6 |
7 | /**
8 | * A condition is a condition which can be tested on the current state in the browser.
9 | *
10 | * @author dannyroest@gmail.com (Danny Roest)
11 | */
12 | @ThreadSafe
13 | public interface Condition {
14 |
15 | /**
16 | * @param browser
17 | * The browser.
18 | * @return whether the evaluated condition is satisfied
19 | */
20 | boolean check(EmbeddedBrowser browser);
21 |
22 | }
23 |
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/condition/NotRegexCondition.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.condition;
2 |
3 | import net.jcip.annotations.Immutable;
4 |
5 | import java.util.Objects;
6 |
7 | import com.crawljax.browser.EmbeddedBrowser;
8 | import com.google.common.base.MoreObjects;
9 |
10 | /**
11 | * A condition which returns true iff the expression does NOT occur in the DOM.
12 | *
13 | * @author dannyroest@gmail.com (Danny Roest)
14 | */
15 | @Immutable
16 | public class NotRegexCondition implements Condition {
17 |
18 | private final RegexCondition regexCondition;
19 |
20 | /**
21 | * @param expression
22 | * the regular expression.
23 | */
24 | public NotRegexCondition(String expression) {
25 | this.regexCondition = new RegexCondition(expression);
26 | }
27 |
28 | @Override
29 | public boolean check(EmbeddedBrowser browser) {
30 | return Logic.not(regexCondition).check(browser);
31 | }
32 |
33 | @Override
34 | public int hashCode() {
35 | return Objects.hash(getClass(), regexCondition);
36 | }
37 |
38 | @Override
39 | public boolean equals(Object object) {
40 | if (object instanceof NotRegexCondition) {
41 | NotRegexCondition that = (NotRegexCondition) object;
42 | return Objects.equals(this.regexCondition, that.regexCondition);
43 | }
44 | return false;
45 | }
46 |
47 | @Override
48 | public String toString() {
49 | return MoreObjects.toStringHelper(this)
50 | .add("regexCondition", regexCondition)
51 | .toString();
52 | }
53 |
54 | }
55 |
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/condition/NotUrlCondition.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.condition;
2 |
3 | import net.jcip.annotations.Immutable;
4 |
5 | import java.util.Objects;
6 |
7 | import com.crawljax.browser.EmbeddedBrowser;
8 | import com.google.common.base.MoreObjects;
9 |
10 | /**
11 | * Conditions that returns true iff the browser's current url NOT contains url. Note: Case
12 | * insensitive.
13 | *
14 | * @author dannyroest@gmail.com (Danny Roest)
15 | */
16 | @Immutable
17 | public class NotUrlCondition implements Condition {
18 |
19 | private final UrlCondition urlCondition;
20 |
21 | /**
22 | * @param url
23 | * the URL.
24 | */
25 | public NotUrlCondition(String url) {
26 | this.urlCondition = new UrlCondition(url);
27 | }
28 |
29 | @Override
30 | public boolean check(EmbeddedBrowser browser) {
31 | return Logic.not(urlCondition).check(browser);
32 | }
33 |
34 | @Override
35 | public int hashCode() {
36 | return Objects.hash(getClass(), urlCondition);
37 | }
38 |
39 | @Override
40 | public boolean equals(Object object) {
41 | if (object instanceof NotUrlCondition) {
42 | NotUrlCondition that = (NotUrlCondition) object;
43 | return Objects.equals(this.urlCondition, that.urlCondition);
44 | }
45 | return false;
46 | }
47 |
48 | @Override
49 | public String toString() {
50 | return MoreObjects.toStringHelper(this)
51 | .add("urlCondition", urlCondition)
52 | .toString();
53 | }
54 |
55 | }
56 |
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/condition/NotVisibleCondition.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.condition;
2 |
3 | import net.jcip.annotations.Immutable;
4 |
5 | import java.util.Objects;
6 |
7 | import com.crawljax.browser.EmbeddedBrowser;
8 | import com.crawljax.core.state.Identification;
9 | import com.google.common.base.MoreObjects;
10 |
11 | /**
12 | * Conditions that returns true iff element found with By is visible.
13 | *
14 | * @author dannyroest@gmail.com (Danny Roest)
15 | */
16 | @Immutable
17 | public class NotVisibleCondition implements Condition {
18 |
19 | private final VisibleCondition visibleCondition;
20 |
21 | /**
22 | * @param identification
23 | * the identification.
24 | */
25 | public NotVisibleCondition(Identification identification) {
26 | this.visibleCondition = new VisibleCondition(identification);
27 | }
28 |
29 | @Override
30 | public boolean check(EmbeddedBrowser browser) {
31 | return Logic.not(visibleCondition).check(browser);
32 | }
33 |
34 | @Override
35 | public int hashCode() {
36 | return Objects.hash(getClass(), visibleCondition);
37 | }
38 |
39 | @Override
40 | public boolean equals(Object object) {
41 | if (object instanceof NotVisibleCondition) {
42 | NotVisibleCondition that = (NotVisibleCondition) object;
43 | return Objects.equals(this.visibleCondition, that.visibleCondition);
44 | }
45 | return false;
46 | }
47 |
48 | @Override
49 | public String toString() {
50 | return MoreObjects.toStringHelper(this)
51 | .add("visibleCondition", visibleCondition)
52 | .toString();
53 | }
54 |
55 | }
56 |
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/condition/NotXPathCondition.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.condition;
2 |
3 | import net.jcip.annotations.Immutable;
4 |
5 | import java.util.Objects;
6 |
7 | import com.crawljax.browser.EmbeddedBrowser;
8 | import com.google.common.base.MoreObjects;
9 |
10 | /**
11 | * Condition that returns true iff no elements are found with expression.
12 | *
13 | * @author dannyroest@gmail.com (Danny Roest)
14 | */
15 | @Immutable
16 | public class NotXPathCondition implements Condition {
17 |
18 | private final XPathCondition xpathCondition;
19 |
20 | /**
21 | * @param expression
22 | * the XPath expression.
23 | */
24 | public NotXPathCondition(String expression) {
25 | this.xpathCondition = new XPathCondition(expression);
26 | }
27 |
28 | @Override
29 | public boolean check(EmbeddedBrowser browser) {
30 | return Logic.not(xpathCondition).check(browser);
31 | }
32 |
33 | @Override
34 | public int hashCode() {
35 | return Objects.hash(getClass(), xpathCondition);
36 | }
37 |
38 | @Override
39 | public boolean equals(Object object) {
40 | if (object instanceof NotXPathCondition) {
41 | NotXPathCondition that = (NotXPathCondition) object;
42 | return Objects.equals(this.xpathCondition, that.xpathCondition);
43 | }
44 | return false;
45 | }
46 |
47 | @Override
48 | public String toString() {
49 | return MoreObjects.toStringHelper(this)
50 | .add("xpathCondition", xpathCondition)
51 | .toString();
52 | }
53 |
54 | }
55 |
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/condition/UrlCondition.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.condition;
2 |
3 | import net.jcip.annotations.Immutable;
4 |
5 | import java.util.Objects;
6 |
7 | import com.crawljax.browser.EmbeddedBrowser;
8 | import com.google.common.base.MoreObjects;
9 |
10 | /**
11 | * Conditions that returns true iff the browser's current url contains url. Note: Case insensitive
12 | *
13 | * @author dannyroest@gmail.com (Danny Roest)
14 | */
15 | @Immutable
16 | public class UrlCondition implements Condition {
17 |
18 | private final String url;
19 |
20 | /**
21 | * @param url
22 | * the URL.
23 | */
24 | public UrlCondition(String url) {
25 | this.url = url;
26 | }
27 |
28 | @Override
29 | public boolean check(EmbeddedBrowser browser) {
30 | return browser.getCurrentUrl().toLowerCase().contains(url);
31 | }
32 |
33 | @Override
34 | public int hashCode() {
35 | return Objects.hash(getClass(), url);
36 | }
37 |
38 | @Override
39 | public boolean equals(Object object) {
40 | if (object instanceof UrlCondition) {
41 | UrlCondition that = (UrlCondition) object;
42 | return Objects.equals(this.url, that.url);
43 | }
44 | return false;
45 | }
46 |
47 | @Override
48 | public String toString() {
49 | return MoreObjects.toStringHelper(this)
50 | .add("url", url)
51 | .toString();
52 | }
53 |
54 | }
55 |
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/condition/VisibleCondition.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.condition;
2 |
3 | import net.jcip.annotations.Immutable;
4 |
5 | import java.util.Objects;
6 |
7 | import com.crawljax.browser.EmbeddedBrowser;
8 | import com.crawljax.core.state.Identification;
9 | import com.google.common.base.MoreObjects;
10 |
11 | /**
12 | * Conditions that returns true iff element found by given identification is visible.
13 | */
14 | @Immutable
15 | public class VisibleCondition implements Condition {
16 |
17 | private final Identification identification;
18 |
19 | /**
20 | * @param identification
21 | * the identification.
22 | */
23 | public VisibleCondition(Identification identification) {
24 | this.identification = identification;
25 | }
26 |
27 | @Override
28 | public boolean check(EmbeddedBrowser browser) {
29 | return browser.isVisible(identification);
30 | }
31 |
32 | @Override
33 | public String toString() {
34 | return MoreObjects.toStringHelper(this)
35 | .add("identification", identification)
36 | .toString();
37 | }
38 |
39 | @Override
40 | public int hashCode() {
41 | return Objects.hash(getClass(), identification);
42 | }
43 |
44 | @Override
45 | public boolean equals(Object object) {
46 | if (object instanceof VisibleCondition) {
47 | VisibleCondition that = (VisibleCondition) object;
48 | return Objects.equals(this.identification, that.identification);
49 | }
50 | return false;
51 | }
52 |
53 | }
54 |
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/condition/browserwaiter/ExpectedCondition.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.condition.browserwaiter;
2 |
3 | import net.jcip.annotations.ThreadSafe;
4 |
5 | import com.crawljax.browser.EmbeddedBrowser;
6 |
7 | /**
8 | * Interface for defining conditions to wait for.
9 | *
10 | * @author dannyroest@gmail.com (Danny Roest)
11 | */
12 | @ThreadSafe
13 | public interface ExpectedCondition {
14 |
15 | /**
16 | * Is the expected condition satisfied.
17 | *
18 | * @param browser
19 | * the browser to execute the check on
20 | * @return Whether the condition is satisfied.
21 | */
22 | boolean isSatisfied(EmbeddedBrowser browser);
23 |
24 | }
25 |
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/condition/browserwaiter/ExpectedElementCondition.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.condition.browserwaiter;
2 |
3 | import net.jcip.annotations.ThreadSafe;
4 |
5 | import com.crawljax.browser.EmbeddedBrowser;
6 | import com.crawljax.core.state.Identification;
7 |
8 | /**
9 | * Checks whether an elements exists.
10 | *
11 | * @author dannyroest@gmail.com (Danny Roest)
12 | */
13 | @ThreadSafe
14 | public class ExpectedElementCondition implements ExpectedCondition {
15 |
16 | private final Identification identification;
17 |
18 | /**
19 | * Constructor.
20 | *
21 | * @param identification
22 | * the identification to use.
23 | */
24 | public ExpectedElementCondition(Identification identification) {
25 | this.identification = identification;
26 | }
27 |
28 | @Override
29 | public boolean isSatisfied(EmbeddedBrowser browser) {
30 | return browser.elementExists(identification);
31 | }
32 |
33 | @Override
34 | public String toString() {
35 | return this.getClass().getSimpleName() + ": " + this.identification;
36 | }
37 |
38 | }
39 |
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/condition/browserwaiter/ExpectedVisibleCondition.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.condition.browserwaiter;
2 |
3 | import net.jcip.annotations.ThreadSafe;
4 |
5 | import com.crawljax.browser.EmbeddedBrowser;
6 | import com.crawljax.core.state.Identification;
7 |
8 | /**
9 | * Checks whether an element is visible.
10 | *
11 | * @author dannyroest@gmail.com (Danny Roest)
12 | */
13 | @ThreadSafe
14 | public class ExpectedVisibleCondition implements ExpectedCondition {
15 |
16 | private final Identification identification;
17 |
18 | /**
19 | * Constructor.
20 | *
21 | * @param identification
22 | * identification to use.
23 | */
24 | public ExpectedVisibleCondition(Identification identification) {
25 | this.identification = identification;
26 | }
27 |
28 | @Override
29 | public boolean isSatisfied(EmbeddedBrowser browser) {
30 | return browser.isVisible(identification);
31 | }
32 |
33 | @Override
34 | public String toString() {
35 | return this.getClass().getSimpleName() + ": " + this.identification;
36 | }
37 |
38 | }
39 |
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/condition/browserwaiter/WaitConditionChecker.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.condition.browserwaiter;
2 |
3 | import java.util.List;
4 |
5 | import javax.inject.Inject;
6 |
7 | import net.jcip.annotations.ThreadSafe;
8 |
9 | import org.slf4j.Logger;
10 | import org.slf4j.LoggerFactory;
11 |
12 | import com.crawljax.browser.EmbeddedBrowser;
13 | import com.crawljax.core.configuration.CrawlRules;
14 | import com.google.common.collect.ImmutableList;
15 |
16 | /**
17 | * Checks the wait conditions.
18 | */
19 | @ThreadSafe
20 | public class WaitConditionChecker {
21 |
22 | private static final Logger LOGGER = LoggerFactory.getLogger(WaitConditionChecker.class
23 | .getName());
24 |
25 | private ImmutableList waitConditions;
26 |
27 | @Inject
28 | public WaitConditionChecker(CrawlRules rules) {
29 | waitConditions = rules.getPreCrawlConfig().getWaitConditions();
30 | }
31 |
32 | /**
33 | * @return the waitConditions
34 | */
35 | public List getWaitConditions() {
36 | return waitConditions;
37 | }
38 |
39 | /**
40 | * @param browser
41 | * The browser to use.
42 | */
43 | public void wait(EmbeddedBrowser browser) {
44 | if (waitConditions == null) {
45 | return;
46 | }
47 | for (WaitCondition waitCondition : waitConditions) {
48 | LOGGER.info("Checking WaitCondition for url: {}", waitCondition.getUrl());
49 | waitCondition.testAndWait(browser);
50 | }
51 | }
52 |
53 | }
54 |
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/condition/browserwaiter/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Waiter utils.
3 | */
4 | package com.crawljax.condition.browserwaiter;
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/condition/crawlcondition/CrawlCondition.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.condition.crawlcondition;
2 |
3 | import java.util.List;
4 |
5 | import com.crawljax.condition.Condition;
6 | import com.crawljax.condition.ConditionType;
7 |
8 | /**
9 | * A Crawl condition is a condition which should be satisfied in order to crawl the current state.
10 | *
11 | * @author Danny
12 | */
13 | public class CrawlCondition extends ConditionType {
14 |
15 | /**
16 | * @param description
17 | * Description of the condition.
18 | * @param crawlConditionCondition
19 | * Actual condition.
20 | */
21 | public CrawlCondition(String description, Condition crawlConditionCondition) {
22 | super(description, crawlConditionCondition);
23 | }
24 |
25 | /**
26 | * @param description
27 | * Description of the condition.
28 | * @param crawlConditionCondition
29 | * Actual condition.
30 | * @param preConditions
31 | * Preconditions to check first.
32 | */
33 | public CrawlCondition(String description, Condition crawlConditionCondition,
34 | Condition... preConditions) {
35 | super(description, crawlConditionCondition, preConditions);
36 | }
37 |
38 | /**
39 | * @param description
40 | * Description of the condition.
41 | * @param crawlConditionCondition
42 | * Actual condition.
43 | * @param preConditions
44 | * Preconditions to check first.
45 | */
46 | public CrawlCondition(String description, Condition crawlConditionCondition,
47 | List preConditions) {
48 | super(description, crawlConditionCondition, preConditions);
49 | }
50 |
51 | /**
52 | * @return Returns the actual crawl condition.
53 | */
54 | public Condition getCrawlCondition() {
55 | return getCondition();
56 | }
57 |
58 | }
59 |
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/condition/crawlcondition/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Crawlcondition package.
3 | */
4 | package com.crawljax.condition.crawlcondition;
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/condition/eventablecondition/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Eventable condition utils.
3 | */
4 | package com.crawljax.condition.eventablecondition;
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/condition/invariant/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Invariant util package.
3 | */
4 | package com.crawljax.condition.invariant;
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/core/CandidateCrawlAction.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.core;
2 |
3 | import com.crawljax.core.state.Eventable.EventType;
4 | import com.google.common.base.MoreObjects;
5 |
6 | /**
7 | * This class corresponds the combination of a CandidateElement and a single
8 | * eventType.
9 | *
10 | * @author Stefan Lenselink <S.R.Lenselink@student.tudelft.nl>
11 | */
12 | public class CandidateCrawlAction {
13 | private final CandidateElement candidateElement;
14 | private final EventType eventType;
15 |
16 | /**
17 | * The Constructor for the CandidateCrawlAction, build a new instance with
18 | * the CandidateElement and the EventType.
19 | *
20 | * @param candidateElement
21 | * the element to execute the eventType on
22 | * @param eventType
23 | * the eventType to execute on the Candidate Element.
24 | */
25 | public CandidateCrawlAction(CandidateElement candidateElement,
26 | EventType eventType) {
27 | this.candidateElement = candidateElement;
28 | this.eventType = eventType;
29 | }
30 |
31 | /**
32 | * @return the candidateElement
33 | */
34 | public CandidateElement getCandidateElement() {
35 | return candidateElement;
36 | }
37 |
38 | /**
39 | * @return the eventType
40 | */
41 | public EventType getEventType() {
42 | return eventType;
43 | }
44 |
45 | @Override
46 | public String toString() {
47 | return MoreObjects.toStringHelper(this)
48 | .add("candidateElement", candidateElement)
49 | .add("eventType", eventType).toString();
50 | }
51 |
52 | }
53 |
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/core/CrawlSessionNotSetupYetException.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.core;
2 |
3 | import com.crawljax.core.state.StateVertex;
4 | import com.crawljax.di.CrawlSessionProvider;
5 |
6 | /**
7 | * {@link CrawljaxException} that is thrown when you call
8 | * {@link CrawlSessionProvider#get()} before the the initial (index)
9 | * {@link StateVertex} is crawled. Only after the index is crawled will the
10 | * {@link CrawlSession} be available.
11 | */
12 | @SuppressWarnings("serial")
13 | public class CrawlSessionNotSetupYetException extends CrawljaxException {
14 |
15 | public CrawlSessionNotSetupYetException() {
16 | super(
17 | "The crawlsession is not yet availeble. Wait until the index state is crawled.");
18 | }
19 | }
20 |
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/core/CrawlerLeftDomainException.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.core;
2 |
3 | /**
4 | * Is thrown when the browser leaves the domain/scope while crawling.
5 | */
6 | @SuppressWarnings("serial")
7 | public class CrawlerLeftDomainException extends CrawljaxException {
8 |
9 | public CrawlerLeftDomainException(String currentUrl) {
10 | super("Somehow we left the domain/scope to " + currentUrl);
11 | }
12 |
13 | }
14 |
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/core/CrawljaxException.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.core;
2 |
3 | /**
4 | * General exception for Crawljax.
5 | */
6 | public class CrawljaxException extends RuntimeException {
7 |
8 | private static final long serialVersionUID = 8597985648361590779L;
9 |
10 | /**
11 | * Constructs a ContractorException
with null as its detail message.
12 | */
13 | public CrawljaxException() {
14 | super();
15 | }
16 |
17 | /**
18 | * Constructs a new CrawljaxException
with the specified detail message.
19 | *
20 | * @param message
21 | * the detail message.
22 | */
23 | public CrawljaxException(final String message) {
24 | super(message);
25 | }
26 |
27 | /**
28 | * Constructs a new CrawljaxException
with the specified detail message and cause.
29 | *
30 | * @param message
31 | * the detail message.
32 | * @param cause
33 | * the cause (A null value is permitted, and indicates that the cause is nonexistent
34 | * or unknown).
35 | */
36 | public CrawljaxException(final String message, final Throwable cause) {
37 | super(message, cause);
38 | }
39 |
40 | /**
41 | * Constructs a new CrawljaxException
with the specified cause and a detail message
42 | * of (cause==null ? null :
43 | * cause.toString())
44 | *
45 | * @param cause
46 | * the cause (A null value is permitted, and indicates that the cause is nonexistent
47 | * or unknown).
48 | */
49 | public CrawljaxException(final Throwable cause) {
50 | super(cause);
51 | }
52 | }
53 |
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/core/CrawljaxRunner.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.core;
2 |
3 | import static com.google.common.base.Preconditions.checkNotNull;
4 |
5 | import java.util.concurrent.Callable;
6 |
7 | import com.crawljax.core.ExitNotifier.ExitStatus;
8 | import com.crawljax.core.configuration.CrawljaxConfiguration;
9 | import com.crawljax.core.plugin.PostCrawlingPlugin;
10 | import com.crawljax.di.CoreModule;
11 | import com.google.inject.Guice;
12 | import com.google.inject.Injector;
13 |
14 | /**
15 | * Runs crawljax given a certain {@link CrawljaxConfiguration}. Run {@link #call()} to start a
16 | * crawl.
17 | */
18 | public class CrawljaxRunner implements Callable {
19 |
20 | private final CrawljaxConfiguration config;
21 | private CrawlController controller;
22 | private ExitStatus reason;
23 |
24 | public CrawljaxRunner(CrawljaxConfiguration config) {
25 | this.config = config;
26 | }
27 |
28 | /**
29 | * Runs Crawljax with the given configuration.
30 | *
31 | * @return The {@link CrawlSession} once the Crawl is done.
32 | */
33 | @Override
34 | public CrawlSession call() {
35 | Injector injector = Guice.createInjector(new CoreModule(config));
36 | controller = injector.getInstance(CrawlController.class);
37 | CrawlSession session = controller.call();
38 | reason = controller.getReason();
39 | return session;
40 | }
41 |
42 | /**
43 | * Stops Crawljax. It will try to shutdown gracefully and run the {@link PostCrawlingPlugin}s.
44 | */
45 | public void stop() {
46 | checkNotNull(controller, "Cannot stop Crawljax if you haven't started it");
47 | controller.stop();
48 | }
49 |
50 | /**
51 | * @return The {@link ExitStatus} Crawljax stopped or null
if it hasn't stopped
52 | * yet.
53 | */
54 | public ExitStatus getReason() {
55 | return reason;
56 | }
57 |
58 | }
59 |
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/core/StateUnreachableException.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.core;
2 |
3 | import com.crawljax.core.state.StateVertex;
4 |
5 | /**
6 | * Gets thrown when Crawljax cannot get to a target {@link StateVertex}.
7 | */
8 | @SuppressWarnings("serial")
9 | public class StateUnreachableException extends CrawljaxException {
10 |
11 | private StateVertex target;
12 |
13 | public StateUnreachableException(StateVertex state, String reason) {
14 | super("Cannot reach state " + state.getName() + " because " + reason);
15 | this.target = state;
16 | }
17 |
18 | public StateVertex getTarget() {
19 | return target;
20 | }
21 |
22 | }
23 |
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/core/configuration/AcceptAllFramesChecker.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.core.configuration;
2 |
3 | /**
4 | * This class accepts all frames.
5 | *
6 | * @author Stefan Lenselink <slenselink@google.com>
7 | */
8 | public class AcceptAllFramesChecker implements IgnoreFrameChecker {
9 | @Override
10 | public boolean isFrameIgnored(String frameId) {
11 | return false;
12 | }
13 | }
14 |
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/core/configuration/ConfigurationHelper.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.core.configuration;
2 |
3 | import java.util.List;
4 |
5 | /**
6 | * Helper class for configurations.
7 | *
8 | * @author Danny Roest (dannyroest@gmail.com)
9 | */
10 | public final class ConfigurationHelper {
11 |
12 | private ConfigurationHelper() {
13 | }
14 |
15 | /**
16 | * @param items
17 | * The items to be added to the string.
18 | * @return string representation of list. format: a, b, , c. Empty String allowed
19 | */
20 | public static String listToStringEmptyStringAllowed(List items) {
21 | StringBuilder str = new StringBuilder();
22 | int i = 0;
23 | for (String item : items) {
24 | if (i > 0) {
25 | str.append(", ");
26 | }
27 | str.append(item);
28 | i++;
29 | }
30 | return str.toString();
31 | }
32 |
33 | /**
34 | * @param items
35 | * The items to be added to the string.
36 | * @return string representation of list. format: a, b, c
37 | */
38 | public static String listToString(List> items) {
39 | StringBuilder str = new StringBuilder();
40 | for (Object item : items) {
41 | if (!str.toString().equals("")) {
42 | str.append(", ");
43 | }
44 | str.append(item.toString());
45 | }
46 | return str.toString();
47 | }
48 |
49 | /**
50 | * @param value
51 | * The value to be converted
52 | * @return int value of boolean, true=1 false=0
53 | */
54 | public static int booleanToInt(boolean value) {
55 | if (value) {
56 | return 1;
57 | } else {
58 | return 0;
59 | }
60 | }
61 |
62 | }
63 |
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/core/configuration/CrawlScope.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.core.configuration;
2 |
3 | /**
4 | * The crawl scope allows to check if a URL is or not in scope.
5 | *
6 | * URLs in scope are crawled during the crawling process.
7 | *
8 | * @since 3.7
9 | */
10 | @FunctionalInterface
11 | public interface CrawlScope {
12 |
13 | /**
14 | * Tells whether or not the given URL is in scope.
15 | *
16 | * Called during the crawl process, to know if the crawling process should crawl or backtrack.
17 | *
18 | * @param url
19 | * the URL to check if it's in scope.
20 | * @return {@code true} if the given URL is in scope, {@code false} otherwise.
21 | */
22 | boolean isInScope(String url);
23 | }
24 |
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/core/configuration/DefaultCrawlScope.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.core.configuration;
2 |
3 | import java.net.URI;
4 | import java.util.Objects;
5 |
6 | import com.crawljax.util.UrlUtils;
7 | import com.google.common.base.MoreObjects;
8 | import com.google.common.base.Preconditions;
9 |
10 | /**
11 | * A {@code CrawlScope} that allows to crawl only under a given domain.
12 | *
13 | * @since 3.7
14 | */
15 | public class DefaultCrawlScope implements CrawlScope {
16 |
17 | private URI url;
18 |
19 | /**
20 | * Constructs a {@code DefaultCrawlScope} with the given URL.
21 | *
22 | * @param url
23 | * the URL with allowed domain, must not be {@code null}.
24 | */
25 | public DefaultCrawlScope(URI url) {
26 | Preconditions.checkNotNull(url);
27 | this.url = url;
28 | }
29 |
30 | /**
31 | * Gets the URL used for scope check.
32 | *
33 | * @return the URL used for scope check.
34 | */
35 | public URI getUrl() {
36 | return url;
37 | }
38 |
39 | @Override
40 | public boolean isInScope(String url) {
41 | return UrlUtils.isSameDomain(url, this.url);
42 | }
43 |
44 | @Override
45 | public int hashCode() {
46 | return Objects.hash(url);
47 | }
48 |
49 | @Override
50 | public boolean equals(Object object) {
51 | if (object instanceof DefaultCrawlScope) {
52 | DefaultCrawlScope that = (DefaultCrawlScope) object;
53 | return Objects.equals(this.url, that.url);
54 | }
55 | return false;
56 | }
57 |
58 | @Override
59 | public String toString() {
60 | return MoreObjects.toStringHelper(this).add("url", url).toString();
61 | }
62 | }
63 |
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/core/configuration/DefaultUnexpectedAlertHandler.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.core.configuration;
2 |
3 | import org.openqa.selenium.WebDriver;
4 |
5 | /**
6 | * An {@link UnexpectedAlertHandler} that does not handle the alert and always retries the action.
7 | *
8 | * Should be used only if the alerts are automatically handled (accepted or dismissed) by the
9 | * selected {@code WebDriver}, for example, through the capability
10 | * {@link org.openqa.selenium.remote.CapabilityType#UNHANDLED_PROMPT_BEHAVIOUR
11 | * CapabilityType.UNHANDLED_PROMPT_BEHAVIOUR}.
12 | *
13 | * @since 3.8
14 | * @see #INSTANCE
15 | */
16 | public class DefaultUnexpectedAlertHandler implements UnexpectedAlertHandler {
17 |
18 | /**
19 | * The instance of {@code DefaultUnexpectedAlertHandler}.
20 | */
21 | public static final DefaultUnexpectedAlertHandler INSTANCE =
22 | new DefaultUnexpectedAlertHandler();
23 |
24 | @Override
25 | public boolean handleAlert(WebDriver browser, String alertText) {
26 | return true;
27 | }
28 |
29 | }
30 |
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/core/configuration/FormAction.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.core.configuration;
2 |
3 | import com.crawljax.core.state.Eventable.EventType;
4 |
5 | /**
6 | * Represents a form action, e.g. a link that is clicked that handles the form NOTE: In general
7 | * FormAction is not designed to be instantiated directly.
8 | *
9 | * @author DannyRoest@gmail.com (Danny Roest)
10 | */
11 | public class FormAction {
12 |
13 | private CrawlElement crawlElement;
14 |
15 | /**
16 | * @param tagName
17 | * the tag name of the element
18 | * @return this CrawlElement
19 | */
20 | public CrawlElement beforeClickElement(String tagName) {
21 | this.crawlElement = new CrawlElement(EventType.click, tagName);
22 | return crawlElement;
23 | }
24 |
25 | /**
26 | * @return the crawlTag
27 | */
28 | protected CrawlElement getCrawlElement() {
29 | return crawlElement;
30 | }
31 |
32 | }
33 |
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/core/configuration/IgnoreFrameChecker.java:
--------------------------------------------------------------------------------
1 | // Copyright 2010 Google Inc. All Rights Reserved.
2 |
3 | package com.crawljax.core.configuration;
4 |
5 | /**
6 | * This interface is used to reflect to operation to see if a given frame must
7 | * be ignored.
8 | *
9 | * @author Stefan Lenselink <slenselink@google.com>
10 | */
11 | public interface IgnoreFrameChecker {
12 |
13 | /**
14 | * Must a given frame identifier be ignored?
15 | *
16 | * @param frameId
17 | * the frame identifier
18 | * @return true if the specified frame identifier must be ignored
19 | */
20 | boolean isFrameIgnored(String frameId);
21 | }
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/core/configuration/UnexpectedAlertHandler.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.core.configuration;
2 |
3 | import org.openqa.selenium.UnhandledAlertException;
4 | import org.openqa.selenium.WebDriver;
5 |
6 | /**
7 | * A handler for unexpected/unhandled alerts ({@link UnhandledAlertException}).
8 | *
9 | * Allows {@link com.crawljax.browser.WebDriverBackedEmbeddedBrowser WebDriverBackedEmbeddedBrowser}
10 | * to handle unexpected/unhandled alerts when trying to execute browser actions, to continue or not
11 | * with the normal crawling process.
12 | *
13 | * @since 3.8
14 | */
15 | @FunctionalInterface
16 | public interface UnexpectedAlertHandler {
17 |
18 | /**
19 | * Handles the unexpected/unhandled alert and tells whether or not the browser action should be
20 | * retried.
21 | *
22 | * Called when an {@code UnhandledAlertException} is caught after trying to execute a browser
23 | * action.
24 | *
25 | * @param browser
26 | * the browser that was executing the action.
27 | * @param alertText
28 | * the text/message of the alert.
29 | * @return {@code true} if the action should be retried, {@code false} otherwise.
30 | */
31 | boolean handleAlert(WebDriver browser, String alertText);
32 | }
33 |
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/core/exception/BrowserConnectionException.java:
--------------------------------------------------------------------------------
1 | // Copyright 2010 Google Inc. All Rights Reserved.
2 |
3 | package com.crawljax.core.exception;
4 |
5 | import org.openqa.selenium.WebDriverException;
6 |
7 | /**
8 | * This {@link RuntimeException} is thrown when a EmbeddedBrowser lost connection to its underlying
9 | * implementation and so crashed.
10 | *
11 | * @author slenselink@google.com (Stefan Lenselink)
12 | */
13 | public class BrowserConnectionException extends RuntimeException {
14 |
15 | /**
16 | * Generated serial version UID
17 | */
18 | private static final long serialVersionUID = -5149214539340150056L;
19 |
20 | /**
21 | * Create a new BrowserConnectionException based on a previous catched RuntimeException.
22 | *
23 | * @param exception
24 | * the original exception to wrap.
25 | */
26 | public BrowserConnectionException(WebDriverException exception) {
27 | super(exception);
28 | }
29 | }
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/core/exception/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 | * This is the exception package, containing the Exceptions used by Crawljax.
3 | */
4 | package com.crawljax.core.exception;
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/core/plugin/DomChangeNotifierPlugin.java:
--------------------------------------------------------------------------------
1 | /**
2 | * A plugin interface to provide an extension point for comparing the current state with the new
3 | * state induced after firing the event. Note that if you add more than one instance of this type of
4 | * plugin to crawljax, only the last added instance will be used for performing the DOM comparison
5 | * and all others will be ignored.
6 | */
7 | package com.crawljax.core.plugin;
8 |
9 | import com.crawljax.core.CrawlerContext;
10 | import com.crawljax.core.state.Eventable;
11 |
12 | /**
13 | * This plugins lets you override the default state comparison that Crawljax uses.
14 | *
15 | * @deprecated Allthough new states are selected based on this plugin, the actual state comparison used by the
16 | * backing StateFlowGraph is uses the {@link Object#hashCode()} and {@link Object#equals(Object)} function of the
17 | * {@link com.crawljax.core.state.StateVertex}. To implement correct behaviour, do note use this class but specify a
18 | * custom {@link com.crawljax.core.state.StateVertexFactory} in the
19 | * {@link com.crawljax.core.configuration.CrawljaxConfiguration}. This method will be removed in Crawljax 4.x
20 | */
21 | @Deprecated
22 | public interface DomChangeNotifierPlugin extends Plugin {
23 |
24 | /**
25 | * Check to see if the (new) DOM is changed with regards to the old DOM.
26 | *
27 | * This method can be called from multiple threads with different {@link CrawlerContext}
28 | *
29 | *
30 | * @param context The Crawler context.
31 | * @param domBefore the state before the event.
32 | * @param domAfter the state after the event.
33 | * @return true if the state is changed according to the compare method of the oracle.
34 | * @deprecated See class documentation. This method will be removed in Crawljax 4.x
35 | */
36 | @Deprecated
37 | boolean isDomChanged(CrawlerContext context, String domBefore, Eventable e, String domAfter);
38 |
39 | }
40 |
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/core/plugin/GeneratesOutput.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.core.plugin;
2 |
3 | /**
4 | * Interface that adds a setOutputFolder and getOutputFolder method to allow the user to set the
5 | * directory this plugin should use for its output. Note that the output path should be used as an
6 | * absolute path.
7 | */
8 | public interface GeneratesOutput {
9 |
10 | /**
11 | * Sets the absolute output directory that should be used by this plugin.
12 | *
13 | * @param absolutePath
14 | * The path to the output directory to use.
15 | */
16 | void setOutputFolder(String absolutePath);
17 |
18 | /**
19 | * Get the absolute path of the output directory that was specified by the user.
20 | *
21 | * @return The path to use for writing files to.
22 | */
23 | String getOutputFolder();
24 | }
25 |
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/core/plugin/HostInterface.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.core.plugin;
2 |
3 | import java.io.File;
4 | import java.util.Map;
5 |
6 | public interface HostInterface {
7 |
8 | public File getOutputDirectory();
9 | public Map getParameters();
10 | }
11 |
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/core/plugin/HostInterfaceImpl.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.core.plugin;
2 |
3 | import java.io.File;
4 | import java.util.Map;
5 |
6 | public class HostInterfaceImpl implements HostInterface {
7 |
8 | private File outputDirectory;
9 | private Map parameters;
10 |
11 | public HostInterfaceImpl(File outputDirectory, Map parameters) {
12 | this.outputDirectory = outputDirectory;
13 | this.parameters = parameters;
14 | }
15 |
16 | @Override
17 | public File getOutputDirectory() {
18 | return outputDirectory;
19 | }
20 |
21 | public void setOutputDirectory(File outputDirectory) {
22 | this.outputDirectory = outputDirectory;
23 | }
24 |
25 | @Override
26 | public Map getParameters() {
27 | return parameters;
28 | }
29 |
30 | public void setParameters(Map parameters) {
31 | this.parameters = parameters;
32 | }
33 | }
34 |
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/core/plugin/OnBrowserCreatedPlugin.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.core.plugin;
2 |
3 | import com.crawljax.browser.EmbeddedBrowser;
4 |
5 | /**
6 | * This interface denotes the Plugin type that is executed everytime when a new
7 | * Browser is created. This can be used to do login, database changes,
8 | * statistics etc. everytime a new browser is started.
9 | *
10 | * @author Stefan Lenselink <S.R.Lenselink@student.tudelft.nl>
11 | */
12 | public interface OnBrowserCreatedPlugin extends Plugin {
13 |
14 | /**
15 | * This method is executed when a new browser has been created and ready to
16 | * be used by the Crawler. The PreCrawling plugins are executed before these
17 | * plugins are executed except that the precrawling plugins are only
18 | * executed on the first created browser. while this plugin is executed on
19 | * every new browser.
20 | *
21 | * @param newBrowser
22 | * the new created browser object
23 | */
24 | void onBrowserCreated(EmbeddedBrowser newBrowser);
25 |
26 | }
27 |
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/core/plugin/OnFireEventFailedPlugin.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.core.plugin;
2 |
3 | import java.util.List;
4 |
5 | import com.crawljax.core.CrawlerContext;
6 | import com.crawljax.core.state.Eventable;
7 |
8 | /**
9 | * Plugin type that is called every time event that was requested to fire failed firing.
10 | */
11 | public interface OnFireEventFailedPlugin extends Plugin {
12 |
13 | /**
14 | * Method that is called when an event that was requested to fire failed firing.
15 | *
16 | * This method can be called from multiple threads with different {@link CrawlerContext}
17 | *
18 | *
19 | * @param context
20 | * The per crawler context.
21 | * @param eventable
22 | * the eventable that failed to execute
23 | * @param pathToFailure
24 | * the list of eventable lead TO this failed eventable, the eventable excluded.
25 | */
26 | void onFireEventFailed(CrawlerContext context, Eventable eventable,
27 | List pathToFailure);
28 |
29 | }
30 |
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/core/plugin/OnInvariantViolationPlugin.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.core.plugin;
2 |
3 | import com.crawljax.condition.invariant.Invariant;
4 | import com.crawljax.core.CrawlerContext;
5 |
6 | /**
7 | * Plugin type that is called every time an invariant is violated. Invariants are checked after each
8 | * detected state change.
9 | */
10 | public interface OnInvariantViolationPlugin extends Plugin {
11 |
12 | /**
13 | * Method that is called when an invariant is violated.
14 | *
15 | * This method can be called from multiple threads with different {@link CrawlerContext}
16 | *
17 | *
18 | * @param invariant
19 | * the failed invariant.
20 | * @param context
21 | * the browsers context
22 | */
23 | void onInvariantViolation(Invariant invariant, CrawlerContext context);
24 |
25 | }
26 |
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/core/plugin/OnNewStatePlugin.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.core.plugin;
2 |
3 | import com.crawljax.core.CrawlerContext;
4 | import com.crawljax.core.state.StateFlowGraph;
5 | import com.crawljax.core.state.StateVertex;
6 |
7 | /**
8 | * Plugin type that is called every time a new state is found by Crawljax. This also happens for the
9 | * Index State. Example: DOM validation.
10 | */
11 | public interface OnNewStatePlugin extends Plugin {
12 |
13 | /**
14 | * Method that is called when a new state is found. When this method is called the state is
15 | * already added to the {@link StateFlowGraph}.
16 | *
17 | * This method can be called from multiple threads with different {@link CrawlerContext}
18 | *
19 | *
20 | * @param context
21 | * the current context.
22 | * @param newState
23 | * The new state. Equivalent to {@link CrawlerContext#getCurrentState()}.
24 | */
25 | void onNewState(CrawlerContext context, StateVertex newState);
26 |
27 | }
28 |
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/core/plugin/OnRevisitStatePlugin.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.core.plugin;
2 |
3 | import com.crawljax.core.CrawlerContext;
4 | import com.crawljax.core.state.StateVertex;
5 |
6 | /**
7 | * Plugin type that is called every time a state is revisited by Crawljax. Example: Benchmarking.
8 | * This plugin needs an explicit current state because the session.getCurrentState() does not
9 | * contain the correct current state since we are in back-tracking phase.
10 | */
11 | public interface OnRevisitStatePlugin extends Plugin {
12 |
13 | /**
14 | * Method that is called every time a state is revisited by Crawljax. Warning: changing the
15 | * state can influence crawljax, it is not a copy.
16 | *
17 | * This method can be called from multiple threads with different {@link CrawlerContext}
18 | *
19 | *
20 | * @param context
21 | * the crawlSession
22 | * @param currentState
23 | * the state revisited
24 | */
25 | void onRevisitState(CrawlerContext context, StateVertex currentState);
26 |
27 | }
28 |
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/core/plugin/OnUrlLoadPlugin.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.core.plugin;
2 |
3 | import com.crawljax.core.CrawlerContext;
4 |
5 | /**
6 | * Plugin type that is called after the initial URL is (re)loaded. Example: refreshing the page
7 | * (clear the browser cache). The OnURLloadPlugins are run just after the Browser has gone to the
8 | * initial URL. Not only the first time but also every time the Core navigates back (back-tracking).
9 | */
10 | public interface OnUrlLoadPlugin extends Plugin {
11 |
12 | /**
13 | * Method that is called after the url is (re) loaded. Warning: changing the browser can change
14 | * the behaviour of Crawljax. It is not a copy!
15 | *
16 | * This method can be called from multiple threads with different {@link CrawlerContext}
17 | *
18 | *
19 | * @param context
20 | * the current crawler context.
21 | */
22 | void onUrlLoad(CrawlerContext context);
23 |
24 | }
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/core/plugin/Plugin.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.core.plugin;
2 |
3 | /**
4 | * Main interface for all type of plugins, there are 8 different types of
5 | * Plugnis.
6 | *
7 | *
8 | *
9 | * Type
10 | * Executed
11 | * Examples
12 | *
13 | *
14 | * OnNewStatePlugin
15 | * When a new state is found while crawling
16 | * Create Screenshots, Validate DOM
17 | *
18 | *
19 | * OnRevisitStatePlugin
20 | * When a state is revisited
21 | * Crawljax benchmarking
22 | *
23 | *
24 | * OnUrlLoadPlugin
25 | * After the initial URL is (re)loaded
26 | * Reset back-end state
27 | *
28 | *
29 | * OnInvariantViolationPlugin
30 | * When an invariant fails validation
31 | * Report builder
32 | *
33 | *
34 | * PreStateCrawlingPlugin
35 | * Before a new state is crawled
36 | * Logging candidate elements
37 | *
38 | *
39 | * PostCrawlingPlugin
40 | * After the crawling
41 | * Generating tests from the state machine
42 | *
43 | *
44 | * ProxyServerPlugin
45 | * Before the crawling, at the initialization of the core
46 | * Loading a custom proxy configuration in the used browser
47 | *
48 | *
49 | *
50 | */
51 | public interface Plugin {
52 |
53 | }
54 |
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/core/plugin/PostCrawlingPlugin.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.core.plugin;
2 |
3 | import com.crawljax.core.CrawlSession;
4 | import com.crawljax.core.ExitNotifier.ExitStatus;
5 |
6 | /**
7 | * Plugin type that is called after the crawling phase is finished. Examples: report generation,
8 | * test generation
9 | */
10 | public interface PostCrawlingPlugin extends Plugin {
11 |
12 | /**
13 | * Method that is called after the crawling is finished. Warning: changing the session can
14 | * change the behavior of other post crawl plugins. It is not a copy!
15 | *
16 | * @param session
17 | * the crawl session.
18 | * @param exitReason
19 | * The {@link ExitStatus} Crawljax stopped.
20 | */
21 | void postCrawling(CrawlSession session, ExitStatus exitReason);
22 |
23 | }
24 |
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/core/plugin/PreCrawlingPlugin.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.core.plugin;
2 |
3 | import java.net.URL;
4 |
5 | import com.crawljax.core.configuration.CrawljaxConfiguration;
6 |
7 | /**
8 | * {@link Plugin} that is called before the crawling starts and before the initial URL has been
9 | * loaded. This kind of plugins can be used to do for example 'once in a crawlsession' operations
10 | * like logging in a web application or reset the database to a 'clean' state.
11 | */
12 | public interface PreCrawlingPlugin extends Plugin {
13 |
14 | /**
15 | * Method that is called before Crawljax loads the initial {@link URL} and before the core
16 | * starts crawling.
17 | *
18 | * @param config
19 | * The {@link CrawljaxConfiguration} for the coming crawl.
20 | */
21 | void preCrawling(CrawljaxConfiguration config) throws RuntimeException;
22 | }
23 |
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/core/plugin/PreStateCrawlingPlugin.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.core.plugin;
2 |
3 | import com.crawljax.core.CandidateElement;
4 | import com.crawljax.core.CrawlerContext;
5 | import com.crawljax.core.state.StateVertex;
6 | import com.google.common.collect.ImmutableList;
7 |
8 | /**
9 | * Plugin type that is called before firing events on the current DOM state.
10 | */
11 | public interface PreStateCrawlingPlugin extends Plugin {
12 |
13 | /**
14 | * Method that is called before firing events on the current DOM state. Warning the session and
15 | * candidateElements are not clones, changes will result in changed behavior.
16 | *
17 | * This method can be called from multiple threads with different {@link CrawlerContext}
18 | *
19 | *
20 | * @param context
21 | * the current session data.
22 | * @param candidateElements
23 | * the candidates for the current state.
24 | * @param state
25 | * The state being crawled
26 | */
27 | void preStateCrawling(CrawlerContext context,
28 | ImmutableList candidateElements, StateVertex state);
29 |
30 | }
31 |
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/core/state/DefaultStateVertexFactory.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.core.state;
2 |
3 | /**
4 | * The default factory that creates State vertexes with a {@link Object#hashCode()} and {@link Object#equals(Object)}
5 | * function based on the Stripped dom.
6 | */
7 | public class DefaultStateVertexFactory extends StateVertexFactory {
8 |
9 | @Override
10 | public StateVertex newStateVertex(int id, String url, String name, String dom, String strippedDom) {
11 | return new StateVertexImpl(id, url, name, dom, strippedDom);
12 | }
13 | }
14 |
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/core/state/StateVertexFactory.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.core.state;
2 |
3 | /**
4 | * A factory that creates a {@link com.crawljax.core.state.StateVertex}. This factory can be implemented
5 | * if you want to use custom states that use a different {@link Object#hashCode()} or {@link Object#equals(Object)}
6 | * method.
7 | */
8 | public abstract class StateVertexFactory {
9 |
10 | /**
11 | * Defines a State.
12 | *
13 | * @param url the current url of the state
14 | * @param name the name of the state
15 | * @param dom the current DOM tree of the browser
16 | * @param strippedDom the stripped dom by the OracleComparators
17 | */
18 | public abstract StateVertex newStateVertex(int id, String url, String name, String dom, String strippedDom);
19 |
20 |
21 | /**
22 | * @return The index {@link StateVertex}.
23 | */
24 | public StateVertex createIndex(String url, String dom, String strippedDom) {
25 | return newStateVertex(StateVertex.INDEX_ID, url, "index", dom, strippedDom);
26 | }
27 | }
28 |
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/forms/InputValue.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.forms;
2 |
3 | /**
4 | * Value for a FormInput.
5 | *
6 | * @author dannyroest@gmail.com (Danny Roest)
7 | */
8 | public class InputValue {
9 |
10 | private long id;
11 | private String value;
12 | private boolean checked = false;
13 |
14 | /**
15 | * default constructor.
16 | */
17 | public InputValue() {
18 |
19 | }
20 |
21 | /**
22 | * @param value
23 | * the text value
24 | */
25 | public InputValue(String value) {
26 | this(value, true);
27 | }
28 |
29 | /**
30 | * Created a form input value.
31 | *
32 | * @param value
33 | * the text value
34 | * @param checked
35 | * whether the element should be checked
36 | */
37 | public InputValue(String value, boolean checked) {
38 | this.value = value;
39 | this.checked = checked;
40 | }
41 |
42 | @Override
43 | public String toString() {
44 | return getValue();
45 | }
46 |
47 | /**
48 | * @return the id
49 | */
50 | public long getId() {
51 | return id;
52 | }
53 |
54 | /**
55 | * @param id
56 | * the id to set
57 | */
58 | public void setId(long id) {
59 | this.id = id;
60 | }
61 |
62 | /**
63 | * @return the value
64 | */
65 | public String getValue() {
66 | return value;
67 | }
68 |
69 | /**
70 | * @param value
71 | * the value to set
72 | */
73 | public void setValue(String value) {
74 | this.value = value;
75 | }
76 |
77 | /**
78 | * @return the checked
79 | */
80 | public boolean isChecked() {
81 | return checked;
82 | }
83 |
84 | /**
85 | * @param checked
86 | * the checked to set
87 | */
88 | public void setChecked(boolean checked) {
89 | this.checked = checked;
90 | }
91 | }
92 |
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/forms/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Forms package.
3 | */
4 | package com.crawljax.forms;
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/metrics/MetricsModule.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.metrics;
2 |
3 | import com.codahale.metrics.Metric;
4 | import com.codahale.metrics.MetricRegistry;
5 | import com.google.inject.AbstractModule;
6 | import com.google.inject.Module;
7 |
8 | /**
9 | * The module used for setting up Metrics.
10 | */
11 | public class MetricsModule extends AbstractModule implements Module {
12 |
13 | /**
14 | * The prefix for a {@link Metric} concerning Crawljax.
15 | */
16 | public static final String CRAWL_PREFIX = "com.crawljax.crawl";
17 |
18 | /**
19 | * The prefix for a {@link Metric} concerning the events during a crawl.
20 | */
21 | public static final String EVENTS_PREFIX = CRAWL_PREFIX + "events.";
22 |
23 | /**
24 | * The prefix for a {@link Metric} concerning the plugins.
25 | */
26 | public static final String PLUGINS_PREFIX = CRAWL_PREFIX + "plugins.";
27 |
28 | @Override
29 | protected void configure() {
30 | bind(MetricRegistry.class).asEagerSingleton();
31 | }
32 |
33 | }
34 |
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/oraclecomparator/AbstractComparator.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.oraclecomparator;
2 |
3 | import java.util.List;
4 |
5 | import net.jcip.annotations.NotThreadSafe;
6 |
7 | import org.custommonkey.xmlunit.Difference;
8 |
9 | import com.crawljax.util.DomUtils;
10 |
11 | /**
12 | * The Abstract base class of all the Comparators. All comparators are not Thread safe as
13 | * comparators are shared between Threads and the origionalDom and newDom can not be final.
14 | */
15 | @NotThreadSafe
16 | public abstract class AbstractComparator implements Comparator {
17 |
18 | @Override
19 | public List getDifferences(String oldDom, String newDom) {
20 | return DomUtils.getDifferences(normalize(oldDom), normalize(newDom));
21 | }
22 |
23 | @Override
24 | public boolean isEquivalent(String oldDom, String newDom) {
25 | boolean equivalent = false;
26 | if (StateComparator.COMPARE_IGNORE_CASE) {
27 | equivalent = normalize(oldDom).equalsIgnoreCase(normalize(newDom));
28 | } else {
29 | equivalent = normalize(oldDom).equals(normalize(newDom));
30 | }
31 | return equivalent;
32 | }
33 |
34 | /**
35 | * Override this method to apply normalization to the comparison.
36 | *
37 | * @param dom
38 | * The original DOM
39 | * @return the normalized DOM.
40 | */
41 | @Override
42 | public String normalize(String dom) {
43 | return dom;
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/oraclecomparator/Comparator.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.oraclecomparator;
2 |
3 | import java.util.List;
4 |
5 | import net.jcip.annotations.NotThreadSafe;
6 |
7 | import org.custommonkey.xmlunit.Difference;
8 |
9 | /**
10 | * Interface for oracle comparators.
11 | */
12 | @NotThreadSafe
13 | public interface Comparator {
14 |
15 | /**
16 | * @return The differences between the two DOMs
17 | */
18 | List getDifferences(String oldDom, String newDom);
19 |
20 | /**
21 | * @return if the originalDom and the newDom are equivalent
22 | */
23 | boolean isEquivalent(String oldDom, String newDom);
24 |
25 | /**
26 | * @return The normalized DOM, on which the comparison is made.
27 | */
28 | String normalize(String dom);
29 |
30 | }
31 |
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/oraclecomparator/comparators/AttributeComparator.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.oraclecomparator.comparators;
2 |
3 | import java.util.ArrayList;
4 | import java.util.List;
5 |
6 | import com.crawljax.oraclecomparator.AbstractComparator;
7 |
8 | /**
9 | * Oracle Comparator that ignores the specified attributes.
10 | */
11 | public class AttributeComparator extends AbstractComparator {
12 |
13 | private final List ignoreAttributes = new ArrayList();
14 |
15 | /**
16 | * @param attributes
17 | * the attributes to ignore
18 | */
19 | public AttributeComparator(String... attributes) {
20 | for (String attribute : attributes) {
21 | ignoreAttributes.add(attribute);
22 | }
23 | }
24 |
25 | @Override
26 | public String normalize(String dom) {
27 | String strippedDom = dom;
28 | for (String attribute : ignoreAttributes) {
29 | String regExp = "\\s" + attribute + "=\"[^\"]*\"";
30 | strippedDom = strippedDom.replaceAll(regExp, "");
31 | }
32 | return strippedDom;
33 | }
34 | }
35 |
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/oraclecomparator/comparators/PlainStructureComparator.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.oraclecomparator.comparators;
2 |
3 | import com.crawljax.oraclecomparator.AbstractComparator;
4 |
5 | public class PlainStructureComparator extends AbstractComparator {
6 |
7 | private final boolean removeAttributes;
8 |
9 | public PlainStructureComparator() {
10 | this(true);
11 | }
12 |
13 | public PlainStructureComparator(boolean removeAttributes) {
14 | this.removeAttributes = removeAttributes;
15 | }
16 |
17 | @Override
18 | public String normalize(String dom) {
19 | String normalized = dom;
20 | if (removeAttributes) {
21 | normalized = stripAttributes(normalized);
22 | }
23 | return stripContent(normalized);
24 | }
25 |
26 | private String stripAttributes(String string) {
27 | String regExAttributes = "<(.+?)(\\s.*?)?(/)?>";
28 | String ret = string.replaceAll(regExAttributes, "<$1$3>");
29 | return ret;
30 | }
31 |
32 | private String stripContent(String string) {
33 | String strippedStr;
34 |
35 | // remove linebreaks
36 | strippedStr = string.replaceAll("[\\t\\n\\x0B\\f\\r]", "");
37 |
38 | // remove content
39 | strippedStr = strippedStr.replaceAll(">(.*?)<", "><");
40 | return strippedStr;
41 | }
42 |
43 | }
44 |
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/oraclecomparator/comparators/RegexComparator.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.oraclecomparator.comparators;
2 |
3 | import java.util.Collection;
4 |
5 | import com.crawljax.oraclecomparator.AbstractComparator;
6 | import com.crawljax.util.DomUtils;
7 | import com.google.common.collect.ImmutableList;
8 |
9 | /**
10 | * Regex oracles that strips content from the DOM to check whether the DOMs are equal without the
11 | * specified regular expressions.
12 | *
13 | * @author dannyroest@gmail.com (Danny Roest)
14 | */
15 | public class RegexComparator extends AbstractComparator {
16 |
17 | // NOTE: the ordering can be important
18 | private final ImmutableList regexs;
19 |
20 | public RegexComparator(Collection regexs) {
21 | this.regexs = ImmutableList.copyOf(regexs);
22 | }
23 |
24 | public RegexComparator(String... regexs) {
25 | this.regexs = ImmutableList.copyOf(regexs);
26 | }
27 |
28 | @Override
29 | public String normalize(String dom) {
30 | String normalized = dom;
31 | for (String regex : regexs) {
32 | normalized = DomUtils.replaceString(normalized, regex, "");
33 | }
34 | return normalized;
35 | }
36 |
37 | }
38 |
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/oraclecomparator/comparators/ScriptComparator.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.oraclecomparator.comparators;
2 |
3 | import java.io.IOException;
4 |
5 | import org.slf4j.Logger;
6 | import org.slf4j.LoggerFactory;
7 | import org.w3c.dom.Document;
8 |
9 | import com.crawljax.oraclecomparator.AbstractComparator;
10 | import com.crawljax.util.DomUtils;
11 |
12 | public class ScriptComparator extends AbstractComparator {
13 |
14 | private static final Logger LOGGER = LoggerFactory.getLogger(AbstractComparator.class
15 | .getName());
16 |
17 | @Override
18 | public String normalize(String dom) {
19 | Document orgDoc;
20 | try {
21 | orgDoc = DomUtils.asDocument(dom);
22 | orgDoc = DomUtils.removeScriptTags(orgDoc);
23 | return DomUtils.getDocumentToString(orgDoc);
24 | } catch (IOException e) {
25 | LOGGER.warn("Could not perform DOM comparison", e);
26 | return dom;
27 | }
28 |
29 | }
30 | }
31 |
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/oraclecomparator/comparators/SimpleComparator.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.oraclecomparator.comparators;
2 |
3 | import com.crawljax.oraclecomparator.AbstractComparator;
4 |
5 | /**
6 | * Simple oracle which can ignore whitespaces and linebreaks.
7 | */
8 | public class SimpleComparator extends AbstractComparator {
9 |
10 | /**
11 | * Default argument less constructor.
12 | */
13 | public SimpleComparator() {
14 | super();
15 | }
16 |
17 | @Override
18 | public String normalize(String string) {
19 | String strippedStr;
20 |
21 | // remove linebreaks
22 | strippedStr = string.replaceAll("[\\t\\n\\x0B\\f\\r]", "");
23 |
24 | // remove just before and after elements spaces
25 | strippedStr = strippedStr.replaceAll(">[ ]*", ">");
26 | strippedStr = strippedStr.replaceAll("[ ]*<", "<");
27 |
28 | return strippedStr;
29 | }
30 |
31 | }
32 |
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/oraclecomparator/comparators/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Oracles oracles package.
3 | */
4 | package com.crawljax.oraclecomparator.comparators;
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/oraclecomparator/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Oracle package.
3 | */
4 | package com.crawljax.oraclecomparator;
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/util/DOMComparer.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.util;
2 |
3 | import java.util.List;
4 |
5 | import org.custommonkey.xmlunit.DetailedDiff;
6 | import org.custommonkey.xmlunit.Diff;
7 | import org.custommonkey.xmlunit.Difference;
8 | import org.w3c.dom.Document;
9 |
10 | /**
11 | * This class allows to compare two Document objects and save the differences in a list.
12 | *
13 | * @author beze232056
14 | */
15 | public class DOMComparer {
16 | /**
17 | * The control document. This is used as a base to compare the testDOM with.
18 | */
19 | private final Document controlDOM;
20 |
21 | /**
22 | * The test document. This is the document in which we want to detect differences.
23 | */
24 | private final Document testDOM;
25 |
26 | /**
27 | * Constructor.
28 | *
29 | * @param controlDOM
30 | * The control DOM.
31 | * @param testDOM
32 | * The test DOM.
33 | */
34 | public DOMComparer(Document controlDOM, Document testDOM) {
35 | this.controlDOM = controlDOM;
36 | this.testDOM = testDOM;
37 | }
38 |
39 | /**
40 | * Compare the controlDOM and testDOM and save and return the differences in a list.
41 | *
42 | * @return list with differences
43 | */
44 | @SuppressWarnings("unchecked")
45 | public List compare() {
46 | Diff diff = new Diff(this.controlDOM, this.testDOM);
47 | DetailedDiff detDiff = new DetailedDiff(diff);
48 | return detDiff.getAllDifferences();
49 | }
50 |
51 | }
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/util/DomDifferenceListener.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.util;
2 |
3 | import java.util.List;
4 |
5 | import org.custommonkey.xmlunit.Difference;
6 | import org.custommonkey.xmlunit.DifferenceListener;
7 | import org.w3c.dom.Node;
8 |
9 | final class DomDifferenceListener implements DifferenceListener {
10 | private final List ignoreAttributes;
11 |
12 | DomDifferenceListener(List ignoreAttributes) {
13 | this.ignoreAttributes = ignoreAttributes;
14 | }
15 |
16 | @Override
17 | public void skippedComparison(Node control, Node test) {
18 | }
19 |
20 | @Override
21 | public int differenceFound(Difference difference) {
22 | if (difference.getControlNodeDetail() == null
23 | || difference.getControlNodeDetail().getNode() == null
24 | || difference.getTestNodeDetail() == null
25 | || difference.getTestNodeDetail().getNode() == null) {
26 | return RETURN_ACCEPT_DIFFERENCE;
27 | }
28 | if (ignoreAttributes.contains(difference.getTestNodeDetail().getNode()
29 | .getNodeName())
30 | || ignoreAttributes.contains(difference.getControlNodeDetail()
31 | .getNode().getNodeName())) {
32 | return RETURN_IGNORE_DIFFERENCE_NODES_IDENTICAL;
33 | }
34 | return RETURN_ACCEPT_DIFFERENCE;
35 | }
36 | }
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/util/HtmlNamespace.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Created Aug 7, 2008
3 | */
4 | package com.crawljax.util;
5 |
6 | import java.util.Iterator;
7 |
8 | import javax.xml.XMLConstants;
9 | import javax.xml.namespace.NamespaceContext;
10 |
11 | /**
12 | * @author mesbah
13 | */
14 | public class HtmlNamespace implements NamespaceContext {
15 | /**
16 | * @param prefix
17 | * The prefix of the URI.
18 | * @return The namespace URI.
19 | */
20 | @Override
21 | public String getNamespaceURI(String prefix) {
22 | if (prefix == null) {
23 | throw new NullPointerException("Null prefix");
24 | } else if ("html".equals(prefix)) {
25 | return "http://www.w3.org/1999/xhtml";
26 | } else if ("xml".equals(prefix)) {
27 | return XMLConstants.XML_NS_URI;
28 | }
29 |
30 | return XMLConstants.DEFAULT_NS_PREFIX;
31 | }
32 |
33 | // This method isn't necessary for XPath processing.
34 |
35 | /**
36 | * @param uri
37 | * TODO: DOCUMENT ME!
38 | * @return TODO: DOCUMENT ME!
39 | */
40 | @Override
41 | public String getPrefix(String uri) {
42 | throw new UnsupportedOperationException();
43 | }
44 |
45 | // This method isn't necessary for XPath processing either.
46 | /**
47 | * @param uri
48 | * TODO: DOCUMENT ME!
49 | * @return TODO: DOCUMENT ME!
50 | */
51 | @Override
52 | public Iterator getPrefixes(String uri) {
53 | throw new UnsupportedOperationException();
54 | }
55 | }
56 |
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/util/XMLObject.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.util;
2 |
3 | import java.beans.XMLDecoder;
4 | import java.beans.XMLEncoder;
5 | import java.io.FileInputStream;
6 | import java.io.FileNotFoundException;
7 | import java.io.FileOutputStream;
8 |
9 | /**
10 | * XMLObject helper.
11 | */
12 | public final class XMLObject {
13 |
14 | private XMLObject() {
15 |
16 | }
17 |
18 | /**
19 | * Converts an object to an XML file.
20 | *
21 | * @param object
22 | * The object to convert.
23 | * @param fname
24 | * The filename where to save it to.
25 | * @throws FileNotFoundException
26 | * On error.
27 | */
28 | public static void objectToXML(Object object, String fname) throws FileNotFoundException {
29 | FileOutputStream fo = new FileOutputStream(fname);
30 | XMLEncoder encoder = new XMLEncoder(fo);
31 | encoder.writeObject(object);
32 | encoder.close();
33 | }
34 |
35 | /**
36 | * Converts an XML file to an object.
37 | *
38 | * @param fname
39 | * The filename where to save it to.
40 | * @throws FileNotFoundException
41 | * On error.
42 | * @return The object.
43 | */
44 | public static Object xmlToObject(String fname) throws FileNotFoundException {
45 | FileInputStream fi = new FileInputStream(fname);
46 | XMLDecoder decoder = new XMLDecoder(fi);
47 | Object object = decoder.readObject();
48 | decoder.close();
49 | return object;
50 | }
51 |
52 | }
53 |
--------------------------------------------------------------------------------
/core/src/main/java/com/crawljax/util/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Utility classes.
3 | */
4 | package com.crawljax.util;
5 |
6 |
--------------------------------------------------------------------------------
/core/src/test/java/com/crawljax/browser/BrowserClosesDownloadPopUp.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.browser;
2 |
3 | import static com.crawljax.browser.matchers.StateFlowGraphMatchers.hasStates;
4 | import static org.junit.Assert.assertThat;
5 |
6 | import org.eclipse.jetty.util.resource.Resource;
7 | import org.junit.Test;
8 |
9 | import com.crawljax.core.CrawlSession;
10 | import com.crawljax.test.BaseCrawler;
11 | import com.crawljax.test.BrowserTest;
12 | import org.junit.experimental.categories.Category;
13 |
14 | @Category(BrowserTest.class)
15 | public class BrowserClosesDownloadPopUp {
16 |
17 | @Test
18 | public void webBrowserWindowOpensItIsIgnored() {
19 | BaseCrawler crawler =
20 | new BaseCrawler(Resource.newClassPathResource("/site"), "download/download.html");
21 | CrawlSession crawl = crawler.crawl();
22 | assertThat(crawl.getStateFlowGraph(), hasStates(2));
23 | }
24 | }
25 |
--------------------------------------------------------------------------------
/core/src/test/java/com/crawljax/browser/ChromeProxyConfig.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.browser;
2 |
3 | import static com.crawljax.browser.matchers.StateFlowGraphMatchers.hasStates;
4 | import static org.junit.Assert.assertThat;
5 |
6 | import org.eclipse.jetty.util.resource.Resource;
7 | import org.junit.Test;
8 | import org.junit.experimental.categories.Category;
9 |
10 | import com.crawljax.browser.EmbeddedBrowser.BrowserType;
11 | import com.crawljax.core.CrawlSession;
12 | import com.crawljax.core.configuration.BrowserConfiguration;
13 | import com.crawljax.core.configuration.CrawljaxConfiguration.CrawljaxConfigurationBuilder;
14 | import com.crawljax.test.BaseCrawler;
15 | import com.crawljax.test.BrowserTest;
16 | import com.crawljax.test.Utils;
17 |
18 | @Category(BrowserTest.class)
19 | public class ChromeProxyConfig {
20 |
21 | @Test
22 | public void chromeProxyConfig() throws Exception {
23 | Utils.assumeBinary("webdriver.chrome.driver", "chromedriver");
24 |
25 | CrawlSession crawl =
26 | new BaseCrawler(Resource.newClassPathResource("/site"),
27 | "simplelink/simplelink.html") {
28 | @Override
29 | public CrawljaxConfigurationBuilder newCrawlConfigurationBuilder() {
30 | CrawljaxConfigurationBuilder builder =
31 | super.newCrawlConfigurationBuilder();
32 | builder.setBrowserConfig(new BrowserConfiguration(BrowserType.CHROME));
33 | return builder;
34 | }
35 | }.crawl();
36 | assertThat(crawl.getStateFlowGraph(), hasStates(2));
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/core/src/test/java/com/crawljax/condition/BrowserDoesntLeaveUrlTest.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.condition;
2 |
3 | import static com.crawljax.browser.matchers.StateFlowGraphMatchers.hasStates;
4 | import static org.junit.Assert.assertThat;
5 |
6 | import org.junit.Test;
7 | import org.junit.experimental.categories.Category;
8 |
9 | import com.crawljax.core.CrawlSession;
10 | import com.crawljax.test.BaseCrawler;
11 | import com.crawljax.test.BrowserTest;
12 |
13 | @Category(BrowserTest.class)
14 | public class BrowserDoesntLeaveUrlTest {
15 |
16 | @Test
17 | public void whenJavaScriptNavigatesAwayFromPageItIsBlocked() throws Exception {
18 | BaseCrawler crawler = new BaseCrawler("navigate_other_urls.html");
19 | // crawler.showWebSite();
20 | CrawlSession session = crawler.crawl();
21 | assertThat(session.getStateFlowGraph(), hasStates(5));
22 | }
23 | }
24 |
--------------------------------------------------------------------------------
/core/src/test/java/com/crawljax/core/ExitNotifierTest.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.core;
2 |
3 | import static org.hamcrest.core.Is.is;
4 | import static org.junit.Assert.assertThat;
5 |
6 | import org.junit.Test;
7 |
8 | import com.crawljax.core.ExitNotifier.ExitStatus;
9 |
10 | public class ExitNotifierTest {
11 |
12 | private ExitNotifier notifier;
13 |
14 | @Test(timeout = 2000)
15 | public void whenMaximumStatesReachedItExists() throws InterruptedException {
16 | notifier = new ExitNotifier(2);
17 | notifier.incrementNumberOfStates();
18 | notifier.incrementNumberOfStates();
19 | ExitStatus reason = notifier.awaitTermination();
20 | assertThat(reason, is(ExitStatus.MAX_STATES));
21 |
22 | }
23 |
24 | @Test(timeout = 2000)
25 | public void whenNoStateLimitItDoesntTerminate() throws InterruptedException {
26 | notifier = new ExitNotifier(0);
27 | notifier.incrementNumberOfStates();
28 | notifier.incrementNumberOfStates();
29 | assertThat(notifier.isExitCalled(), is(false));
30 | }
31 |
32 | }
33 |
--------------------------------------------------------------------------------
/core/src/test/java/com/crawljax/core/NestedFramesTest.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.core;
2 |
3 | import com.crawljax.browser.BrowserProvider;
4 | import com.crawljax.test.BrowserTest;
5 | import com.crawljax.test.RunWithWebServer;
6 | import org.junit.ClassRule;
7 | import org.junit.Rule;
8 | import org.junit.Test;
9 | import org.junit.experimental.categories.Category;
10 | import org.openqa.selenium.By;
11 | import org.openqa.selenium.WebDriver;
12 | import org.openqa.selenium.WebElement;
13 |
14 | @Category(BrowserTest.class)
15 | public class NestedFramesTest {
16 |
17 | @ClassRule
18 | public static final RunWithWebServer SERVER = new RunWithWebServer("/site/iframe");
19 |
20 | private WebDriver driver;
21 |
22 | @Rule
23 | public BrowserProvider provider = new BrowserProvider();
24 |
25 | @Test
26 | public void testNestedFramesIndex() {
27 | driver = provider.newBrowser();
28 | driver.get(SERVER.getSiteUrl().toString());
29 |
30 | driver.switchTo().frame(0);
31 | driver.switchTo().frame(0);
32 |
33 | WebElement button002 = driver.findElement(By.id("button002"));
34 | button002.click();
35 | }
36 |
37 |
38 | }
39 |
--------------------------------------------------------------------------------
/core/src/test/java/com/crawljax/core/PopUpTest.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.core;
2 |
3 | import static com.crawljax.browser.matchers.StateFlowGraphMatchers.hasEdges;
4 | import static com.crawljax.browser.matchers.StateFlowGraphMatchers.hasStates;
5 | import static org.junit.Assert.assertThat;
6 |
7 | import java.util.concurrent.TimeUnit;
8 |
9 | import org.junit.ClassRule;
10 | import org.junit.Test;
11 | import org.junit.experimental.categories.Category;
12 |
13 | import com.crawljax.core.configuration.CrawljaxConfiguration.CrawljaxConfigurationBuilder;
14 | import com.crawljax.test.BrowserTest;
15 | import com.crawljax.test.RunWithWebServer;
16 |
17 | @Category(BrowserTest.class)
18 | public class PopUpTest {
19 |
20 | @ClassRule
21 | public static final RunWithWebServer WEB_SERVER = new RunWithWebServer("site");
22 |
23 | @Test
24 | public void testPopups() throws CrawljaxException {
25 | CrawljaxConfigurationBuilder builder = WEB_SERVER.newConfigBuilder("popup");
26 | builder.setMaximumDepth(3);
27 | builder.crawlRules().click("a");
28 | builder.crawlRules().waitAfterEvent(100, TimeUnit.MILLISECONDS);
29 | builder.crawlRules().waitAfterReloadUrl(100, TimeUnit.MILLISECONDS);
30 | CrawljaxRunner runner = new CrawljaxRunner(builder.build());
31 | CrawlSession session = runner.call();
32 | assertThat(session.getStateFlowGraph(), hasEdges(3));
33 | assertThat(session.getStateFlowGraph(), hasStates(4));
34 | }
35 |
36 | }
37 |
--------------------------------------------------------------------------------
/core/src/test/java/com/crawljax/core/configuration/CrawlActionsTest.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.core.configuration;
2 |
3 | import static com.crawljax.core.configuration.CrawlElementMatcher.withXpath;
4 | import static org.hamcrest.Matchers.containsInAnyOrder;
5 | import static org.hamcrest.collection.IsCollectionWithSize.hasSize;
6 | import static org.junit.Assert.assertThat;
7 |
8 | import java.util.List;
9 |
10 | import org.junit.Before;
11 | import org.junit.Test;
12 |
13 | public class CrawlActionsTest {
14 |
15 | private CrawlActionsBuilder actions;
16 |
17 | @Before
18 | public void setup() {
19 | actions = new CrawlActionsBuilder();
20 | }
21 |
22 | @Test
23 | public void testIncludesWork() {
24 | actions.click("a");
25 | actions.click("b").underXPath("123");
26 | actions.click("b").underXPath("sdfsdf");
27 | List crawlElements = actions.build().getLeft();
28 | assertThat(crawlElements, hasSize(3));
29 | }
30 |
31 | @Test
32 | public void testExcludesWork() {
33 | actions.dontClick("a");
34 | actions.dontClick("b").underXPath("123");
35 | actions.dontClick("b").underXPath("sdfsdf");
36 | List crawlElements = actions.build().getRight();
37 | assertThat(crawlElements, hasSize(3));
38 | }
39 |
40 | @Test
41 | @SuppressWarnings("unchecked")
42 | public void testExcludeParents() {
43 | actions.click("a");
44 | actions.click("button");
45 | actions.dontClickChildrenOf("b").withId("someId");
46 | actions.dontClickChildrenOf("b").withClass("someClass");
47 | List crawlElements = actions.build().getRight();
48 | assertThat(crawlElements, hasSize(4));
49 | assertThat(
50 | crawlElements,
51 | containsInAnyOrder(withXpath("//B[@id='someId']//*"),
52 | withXpath("//B[@id='someId']//*"),
53 | withXpath("//B[@class='someClass']//*"),
54 | withXpath("//B[@class='someClass']//*")));
55 | }
56 | }
57 |
--------------------------------------------------------------------------------
/core/src/test/java/com/crawljax/core/configuration/CrawlElementMatcher.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.core.configuration;
2 |
3 | import static org.hamcrest.core.IsEqual.equalTo;
4 |
5 | import org.hamcrest.Factory;
6 | import org.hamcrest.FeatureMatcher;
7 | import org.hamcrest.Matcher;
8 |
9 | public class CrawlElementMatcher {
10 |
11 | /**
12 | * @param xPath
13 | * checks {@link CrawlElement#getWithXpathExpression()}
14 | * @return A {@link Matcher} that inspects if the number of edges.
15 | */
16 | @Factory
17 | public static FeatureMatcher withXpath(String xPath) {
18 | return new FeatureMatcher(equalTo(xPath),
19 | "CrawlElement with xPath", "xPath") {
20 |
21 | @Override
22 | protected String featureValueOf(CrawlElement actual) {
23 | return actual.getWithXpathExpression();
24 | }
25 | };
26 | }
27 |
28 | }
29 |
--------------------------------------------------------------------------------
/core/src/test/java/com/crawljax/core/configuration/DefaultCrawlScopeTest.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.core.configuration;
2 |
3 | import static org.hamcrest.core.Is.is;
4 | import static org.junit.Assert.assertThat;
5 |
6 | import java.net.URI;
7 |
8 | import org.junit.Test;
9 |
10 | public class DefaultCrawlScopeTest {
11 |
12 | private static final URI SEED = URI.create("http://localhost/");
13 |
14 | @Test(expected = NullPointerException.class)
15 | public void nullSeedDomainIsNotAllowed() throws Exception {
16 | new DefaultCrawlScope((URI) null);
17 | }
18 |
19 | @Test
20 | public void defaultCrawlScopeShouldIncludeSeedDomain() throws Exception {
21 | CrawlScope defaultCrawlScope = new DefaultCrawlScope(SEED);
22 | assertThat(defaultCrawlScope.isInScope("http://localhost/in/scope"), is(true));
23 | }
24 |
25 | @Test
26 | public void defaultCrawlScopeShouldNotIncludeNonSeedDomain() throws Exception {
27 | CrawlScope defaultCrawlScope = new DefaultCrawlScope(SEED);
28 | assertThat(defaultCrawlScope.isInScope("http://example.com/not/in/scope"), is(false));
29 | }
30 | }
31 |
--------------------------------------------------------------------------------
/core/src/test/java/com/crawljax/core/configuration/UnderXPathTest.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.core.configuration;
2 |
3 | import static com.crawljax.browser.matchers.StateFlowGraphMatchers.hasStates;
4 | import static org.junit.Assert.assertThat;
5 |
6 | import org.junit.ClassRule;
7 | import org.junit.Test;
8 | import org.junit.experimental.categories.Category;
9 |
10 | import com.crawljax.core.CrawlSession;
11 | import com.crawljax.core.CrawljaxRunner;
12 | import com.crawljax.core.configuration.CrawlRules.CrawlRulesBuilder;
13 | import com.crawljax.core.configuration.CrawljaxConfiguration.CrawljaxConfigurationBuilder;
14 | import com.crawljax.test.BrowserTest;
15 | import com.crawljax.test.RunWithWebServer;
16 |
17 | /**
18 | * Test case for issue number 16: http://code.google.com/p/crawljax/issues/detail?id=16
19 | */
20 | @Category(BrowserTest.class)
21 | public class UnderXPathTest {
22 |
23 | @ClassRule
24 | public static final RunWithWebServer SERVER = new RunWithWebServer("/site");
25 |
26 | @Test
27 | public void testDontClickUnderXPath() throws Exception {
28 | CrawljaxConfigurationBuilder builder = SERVER.newConfigBuilder("underxpath.html");
29 | builder.crawlRules().click("a");
30 | builder.crawlRules().dontClick("a").underXPath("//A[@class=\"noClickClass\"]");
31 | CrawlRulesBuilder rules = builder.crawlRules();
32 | rules.dontClick("a").withAttribute("id", "noClickId");
33 | rules.dontClickChildrenOf("div").withClass("noChildrenOfClass");
34 | rules.dontClickChildrenOf("div").withId("noChildrenOfId");
35 |
36 | CrawlSession session = new CrawljaxRunner(builder.build()).call();
37 |
38 | assertThat(session.getStateFlowGraph(), hasStates(2));
39 | }
40 |
41 | }
42 |
--------------------------------------------------------------------------------
/core/src/test/java/com/crawljax/core/configuration/XPathEscapeApostropheTest.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.core.configuration;
2 |
3 | import static org.junit.Assert.assertEquals;
4 |
5 | import org.junit.Before;
6 | import org.junit.Test;
7 |
8 | import com.crawljax.core.state.Eventable.EventType;
9 |
10 | public class XPathEscapeApostropheTest {
11 |
12 | private CrawlElement element;
13 |
14 | @Before
15 | public void setup() {
16 | element = new CrawlElement(EventType.click, "button");
17 | }
18 |
19 | @Test
20 | public void testStringNoApostrophes() {
21 | String test = "Test String";
22 | test = element.escapeApostrophes(test);
23 | assertEquals("'Test String'", test);
24 | }
25 |
26 | @Test
27 | public void testStringConcat() {
28 | String test = "I'm Feeling Lucky";
29 | test = element.escapeApostrophes(test);
30 | assertEquals("concat('I',\"'\",'m Feeling Lucky')", test);
31 | }
32 |
33 | }
34 |
--------------------------------------------------------------------------------
/core/src/test/java/com/crawljax/core/largetests/LargeChromeTest.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.core.largetests;
2 |
3 | import com.crawljax.browser.EmbeddedBrowser.BrowserType;
4 | import com.crawljax.core.CrawlSession;
5 | import com.crawljax.core.configuration.BrowserConfiguration;
6 | import com.crawljax.test.BrowserTest;
7 | import com.crawljax.test.Utils;
8 |
9 | import org.junit.BeforeClass;
10 | import org.junit.experimental.categories.Category;
11 |
12 | @Category(BrowserTest.class)
13 | public class LargeChromeTest extends LargeTestBase {
14 |
15 | private static CrawlSession session;
16 |
17 | @BeforeClass
18 | public static void setUpBeforeClass() throws Exception {
19 | Utils.assumeBinary("webdriver.chrome.driver", "chromedriver");
20 |
21 | session = setup(new BrowserConfiguration(BrowserType.CHROME), 100, 100);
22 | }
23 |
24 | @Override
25 | protected CrawlSession getSession() {
26 | return session;
27 | }
28 | }
29 |
--------------------------------------------------------------------------------
/core/src/test/java/com/crawljax/core/largetests/LargeFirefoxTest.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.core.largetests;
2 |
3 | import org.junit.BeforeClass;
4 | import org.junit.experimental.categories.Category;
5 |
6 | import com.crawljax.browser.EmbeddedBrowser.BrowserType;
7 | import com.crawljax.core.CrawlSession;
8 | import com.crawljax.core.configuration.BrowserConfiguration;
9 | import com.crawljax.test.BrowserTest;
10 | import com.crawljax.test.Utils;
11 |
12 | @Category(BrowserTest.class)
13 | public class LargeFirefoxTest extends LargeTestBase {
14 |
15 | private static CrawlSession session;
16 |
17 | @BeforeClass
18 | public static void setUpBeforeClass() throws Exception {
19 | Utils.assumeBinary("webdriver.gecko.driver", "geckodriver");
20 |
21 | session = setup(new BrowserConfiguration(BrowserType.FIREFOX, 1), 200, 200);
22 | }
23 |
24 | @Override
25 | protected CrawlSession getSession() {
26 | return session;
27 | }
28 | }
29 |
--------------------------------------------------------------------------------
/core/src/test/java/com/crawljax/core/largetests/LargeIETest.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.core.largetests;
2 |
3 | import static org.hamcrest.Matchers.containsString;
4 | import static org.junit.Assume.assumeThat;
5 |
6 | import org.junit.BeforeClass;
7 | import org.junit.experimental.categories.Category;
8 |
9 | import com.crawljax.browser.EmbeddedBrowser.BrowserType;
10 | import com.crawljax.core.CrawlSession;
11 | import com.crawljax.core.configuration.BrowserConfiguration;
12 | import com.crawljax.test.BrowserTest;
13 |
14 | @Category(BrowserTest.class)
15 | public class LargeIETest extends LargeTestBase {
16 |
17 | private static CrawlSession session;
18 |
19 | @BeforeClass
20 | public static void setUpBeforeClass() throws Exception {
21 | assumeThat(System.getProperty("os.name").toLowerCase(), containsString("windows"));
22 |
23 | session = setup(new BrowserConfiguration(BrowserType.INTERNET_EXPLORER), 400, 400);
24 | }
25 |
26 | @Override
27 | protected CrawlSession getSession() {
28 | return session;
29 | }
30 | }
31 |
--------------------------------------------------------------------------------
/core/src/test/java/com/crawljax/core/largetests/LargePhantomJSTest.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.core.largetests;
2 |
3 | import com.crawljax.browser.EmbeddedBrowser.BrowserType;
4 | import com.crawljax.core.CrawlSession;
5 | import com.crawljax.core.configuration.BrowserConfiguration;
6 | import com.crawljax.test.BrowserTest;
7 | import com.crawljax.test.Utils;
8 |
9 | import org.junit.BeforeClass;
10 | import org.junit.experimental.categories.Category;
11 |
12 | @Category(BrowserTest.class)
13 | public class LargePhantomJSTest extends LargeTestBase {
14 |
15 | private static CrawlSession session;
16 |
17 | @BeforeClass
18 | public static void setUpBeforeClass() throws Exception {
19 | Utils.assumeBinary("phantomjs.binary.path", "phantomjs");
20 |
21 | session = setup(new BrowserConfiguration(BrowserType.PHANTOMJS, 1), 200, 200);
22 | }
23 |
24 | @Override
25 | protected CrawlSession getSession() {
26 | return session;
27 | }
28 | }
29 |
--------------------------------------------------------------------------------
/core/src/test/java/com/crawljax/core/state/ElementTest.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.core.state;
2 |
3 | import static org.hamcrest.CoreMatchers.is;
4 | import static org.junit.Assert.assertThat;
5 |
6 | import java.io.IOException;
7 |
8 | import org.apache.commons.lang3.SerializationUtils;
9 | import org.junit.Test;
10 | import org.w3c.dom.Node;
11 | import org.xml.sax.SAXException;
12 |
13 | public class ElementTest {
14 |
15 | @Test
16 | public void testSerializability() throws SAXException, IOException {
17 | String HTML =
18 | " "
19 | + ""
20 | + "
";
22 | StateVertex sv = new StateVertexImpl(0, "test", HTML);
23 |
24 | Node node = sv.getDocument().getElementById("thea");
25 | Element element = new Element(node);
26 |
27 | byte[] serialized = SerializationUtils.serialize(element);
28 | Element deserializedElement = (Element) SerializationUtils.deserialize(serialized);
29 | assertThat(element, is(deserializedElement));
30 | assertThat(element.getElementId(), is(deserializedElement.getElementId()));
31 |
32 | }
33 |
34 | }
35 |
--------------------------------------------------------------------------------
/core/src/test/java/com/crawljax/core/state/PostCrawlStateGraphChecker.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.core.state;
2 |
3 | import static org.hamcrest.core.Is.is;
4 | import static org.hamcrest.core.IsNull.notNullValue;
5 | import static org.hamcrest.number.OrderingComparison.greaterThanOrEqualTo;
6 | import static org.junit.Assert.assertThat;
7 |
8 | import com.crawljax.core.CrawlSession;
9 | import com.crawljax.core.ExitNotifier.ExitStatus;
10 | import com.crawljax.core.plugin.PostCrawlingPlugin;
11 |
12 | /**
13 | * This {@link PostCrawlingPlugin} checks the {@link InMemoryStateFlowGraph} for consistency after
14 | * the crawl is done.
15 | */
16 | public class PostCrawlStateGraphChecker implements PostCrawlingPlugin {
17 |
18 | @Override
19 | public void postCrawling(CrawlSession session, ExitStatus status) {
20 | StateFlowGraph stateFlowGraph = session.getStateFlowGraph();
21 |
22 | allStatesHaveOneOreMoreIncomingEdges(stateFlowGraph);
23 |
24 | allEdgesConnectTwoStates(stateFlowGraph);
25 | }
26 |
27 | private void allStatesHaveOneOreMoreIncomingEdges(StateFlowGraph stateFlowGraph) {
28 | for (StateVertex state : stateFlowGraph.getAllStates()) {
29 | if (stateFlowGraph.getInitialState().getId() != state.getId()) {
30 | assertThat(stateFlowGraph.getIncomingClickable(state).size(),
31 | is(greaterThanOrEqualTo(1)));
32 | }
33 | }
34 | }
35 |
36 | private void allEdgesConnectTwoStates(StateFlowGraph stateFlowGraph) {
37 | for (Eventable eventable : stateFlowGraph.getAllEdges()) {
38 | assertThat(eventable.getSourceStateVertex(), is(notNullValue()));
39 | assertThat(eventable.getTargetStateVertex(), is(notNullValue()));
40 | }
41 | }
42 |
43 | @Override
44 | public String toString() {
45 | return this.getClass().getSimpleName();
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/core/src/test/java/com/crawljax/core/state/StatesContainElementsTest.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.core.state;
2 |
3 | import static org.hamcrest.collection.IsEmptyCollection.empty;
4 | import static org.hamcrest.core.Is.is;
5 | import static org.hamcrest.core.IsNot.not;
6 | import static org.junit.Assert.assertThat;
7 |
8 | import java.util.Set;
9 |
10 | import org.eclipse.jetty.util.resource.Resource;
11 | import org.junit.Before;
12 | import org.junit.Test;
13 | import org.junit.experimental.categories.Category;
14 |
15 | import com.crawljax.core.CrawlSession;
16 | import com.crawljax.core.configuration.CrawljaxConfiguration.CrawljaxConfigurationBuilder;
17 | import com.crawljax.test.BaseCrawler;
18 | import com.crawljax.test.BrowserTest;
19 |
20 | @Category(BrowserTest.class)
21 | public class StatesContainElementsTest {
22 |
23 | private CrawlSession crawl;
24 |
25 | @Before
26 | public void setup() {
27 | crawl = new BaseCrawler(Resource.newClassPathResource("demo-site")) {
28 | @Override
29 | protected CrawljaxConfigurationBuilder newCrawlConfigurationBuilder() {
30 | return super.newCrawlConfigurationBuilder()
31 | .setMaximumStates(2);
32 | }
33 | }.crawl();
34 | }
35 |
36 | @Test
37 | public void whenCrawledTheStateVertexesContainEvents() {
38 | Set allStates = crawl.getStateFlowGraph().getAllStates();
39 | for (StateVertex stateVertex : allStates) {
40 | if ("index".equals(stateVertex.getName())) {
41 | assertThat(stateVertex.getCandidateElements(), is(not(empty())));
42 | }
43 | }
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/core/src/test/java/com/crawljax/crawls/CrawlWithCustomScopeTest.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.crawls;
2 |
3 | import static org.hamcrest.core.Is.is;
4 | import static org.hamcrest.core.IsCollectionContaining.hasItems;
5 | import static org.junit.Assert.assertThat;
6 |
7 | import java.net.URI;
8 | import java.util.HashSet;
9 | import java.util.Set;
10 |
11 | import org.junit.Test;
12 | import org.junit.experimental.categories.Category;
13 |
14 | import com.crawljax.core.CrawlSession;
15 | import com.crawljax.core.configuration.CrawlScope;
16 | import com.crawljax.core.configuration.CrawljaxConfiguration.CrawljaxConfigurationBuilder;
17 | import com.crawljax.core.state.StateVertex;
18 | import com.crawljax.test.BaseCrawler;
19 | import com.crawljax.test.BrowserTest;
20 |
21 | @Category(BrowserTest.class)
22 | public class CrawlWithCustomScopeTest {
23 |
24 | @Test
25 | public void crawlsPagesOnlyInCustomScope() throws Exception {
26 | CrawlScope crawlScope =
27 | url -> url.contains("in_scope") || url.endsWith("crawlscope/index.html");
28 | BaseCrawler baseCrawler = new BaseCrawler("crawlscope") {
29 | @Override
30 | public CrawljaxConfigurationBuilder newCrawlConfigurationBuilder() {
31 | CrawljaxConfigurationBuilder builder =
32 | super.newCrawlConfigurationBuilder();
33 | builder.setCrawlScope(crawlScope);
34 | return builder;
35 | }
36 | };
37 |
38 | CrawlSession crawlSession = baseCrawler.crawl();
39 |
40 | URI baseUrl = baseCrawler.getWebServer().getSiteUrl();
41 | Set crawledUrls = new HashSet<>();
42 | for (StateVertex state : crawlSession.getStateFlowGraph().getAllStates()) {
43 | crawledUrls.add(state.getUrl());
44 | }
45 |
46 | assertThat(crawledUrls, hasItems(
47 | baseUrl + "crawlscope",
48 | baseUrl + "crawlscope/in_scope.html",
49 | baseUrl + "crawlscope/in_scope_inner.html"));
50 | assertThat(crawledUrls.size(), is(3));
51 | }
52 |
53 | }
54 |
--------------------------------------------------------------------------------
/core/src/test/java/com/crawljax/forms/RandomInputValueGeneratorTest.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.forms;
2 |
3 | import static org.hamcrest.core.Is.is;
4 | import static org.junit.Assert.assertThat;
5 |
6 | import java.util.HashSet;
7 | import java.util.Set;
8 |
9 | import org.junit.Before;
10 | import org.junit.Test;
11 |
12 | import com.google.common.collect.Lists;
13 |
14 | public class RandomInputValueGeneratorTest {
15 | private static final int NUM_RAND_CHECKS = 1000;
16 | private static final int LENGTH_SHORT = 1;
17 | private static final int LENGTH_MEDIUM = 15;
18 | private static final int LENGTH_LONG = 150;
19 |
20 | private RandomInputValueGenerator generator;
21 |
22 | @Before
23 | public void setup() {
24 | generator = new RandomInputValueGenerator();
25 | }
26 |
27 | @Test
28 | public void randomValuesAreUnique() {
29 | Set set = new HashSet<>();
30 | for (int i = 0; i < NUM_RAND_CHECKS; i++) {
31 | assertThat(set.add(generator.getRandomString(LENGTH_MEDIUM)), is(true));
32 | }
33 | }
34 |
35 | @Test
36 | public void testLengthSpecification() {
37 | assertThat(generator.getRandomString(LENGTH_SHORT).length(), is(LENGTH_SHORT));
38 | assertThat(generator.getRandomString(LENGTH_MEDIUM).length(), is(LENGTH_MEDIUM));
39 | assertThat(generator.getRandomString(LENGTH_LONG).length(), is(LENGTH_LONG));
40 | }
41 |
42 | @Test(expected = IllegalArgumentException.class)
43 | public void randomOptionDoesntAcceptEmptyLists() {
44 | generator.getRandomItem(Lists.newArrayList());
45 | }
46 | }
47 |
--------------------------------------------------------------------------------
/core/src/test/java/com/crawljax/test/BrowserTest.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.test;
2 |
3 | /**
4 | * Marker interface for a test that requires a Browser
5 | */
6 | public interface BrowserTest {
7 |
8 | }
9 |
--------------------------------------------------------------------------------
/core/src/test/java/com/crawljax/test/Utils.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.test;
2 |
3 | import static org.hamcrest.core.Is.is;
4 | import static org.junit.Assume.assumeThat;
5 |
6 | import java.io.IOException;
7 |
8 | import org.slf4j.Logger;
9 | import org.slf4j.LoggerFactory;
10 |
11 | /**
12 | * Utilities for tests.
13 | */
14 | public final class Utils {
15 |
16 | private static final Logger LOG = LoggerFactory.getLogger(Utils.class);
17 |
18 | private Utils() {
19 | }
20 |
21 | public static void assumeBinary(String systemProperty, String binaryName)
22 | throws Exception {
23 | assumeThat(System.getProperty(systemProperty) != null
24 | || isOnClassPath(binaryName), is(true));
25 | }
26 |
27 | private static boolean isOnClassPath(String binaryName)
28 | throws IOException, InterruptedException {
29 | try {
30 | if (!System.getProperty("os.name").startsWith("Windows")) {
31 | Process exec = Runtime.getRuntime().exec("which " + binaryName);
32 | boolean found = exec.waitFor() == 0;
33 | LOG.info("Found {} on the classpath = {}", binaryName, found);
34 | return found;
35 | }
36 | } catch (RuntimeException e) {
37 | LOG.info("Could not determine if {} is on the classpath: {}", binaryName,
38 | e.getMessage());
39 | }
40 | return false;
41 | }
42 | }
--------------------------------------------------------------------------------
/core/src/test/java/com/crawljax/test/WebServer.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.test;
2 |
3 | import java.net.URI;
4 |
5 | import com.google.common.base.Preconditions;
6 | import org.eclipse.jetty.server.Server;
7 | import org.eclipse.jetty.server.ServerConnector;
8 | import org.eclipse.jetty.server.handler.ResourceHandler;
9 | import org.eclipse.jetty.util.resource.Resource;
10 |
11 | public class WebServer {
12 | private final Resource resource;
13 |
14 | private int port;
15 | private URI demoSite;
16 | private Server server;
17 | private boolean started;
18 |
19 | /**
20 | * @param classPathResource
21 | * The name of the resource. This resource must be on the test or regular classpath.
22 | */
23 | public WebServer(Resource classPathResource) {
24 | resource = classPathResource;
25 | }
26 |
27 | public void start() throws Exception {
28 | server = new Server(0);
29 | ResourceHandler handler = new ResourceHandler();
30 | handler.setBaseResource(resource);
31 | server.setHandler(handler);
32 | server.start();
33 | this.port = ((ServerConnector) server.getConnectors()[0]).getLocalPort();
34 | this.demoSite = URI.create("http://localhost:" + port + "/");
35 | this.started = true;
36 | }
37 |
38 | public URI getSiteUrl() {
39 | checkServerStarted();
40 | return demoSite;
41 | }
42 |
43 | public int getPort() {
44 | checkServerStarted();
45 | return port;
46 | }
47 |
48 | public void stop() {
49 | checkServerStarted();
50 | try {
51 | server.stop();
52 | } catch (Exception e) {
53 | throw new RuntimeException("Could not stop the server", e);
54 | }
55 | }
56 |
57 | private void checkServerStarted() {
58 | Preconditions.checkState(started, "Server not started");
59 | }
60 |
61 | public void join() throws InterruptedException {
62 | server.join();
63 | }
64 | }
65 |
--------------------------------------------------------------------------------
/core/src/test/java/com/crawljax/test/WebServerTest.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.test;
2 |
3 |
4 | import static org.hamcrest.Matchers.*;
5 | import static org.junit.Assert.assertThat;
6 |
7 | import java.io.IOException;
8 | import java.net.URI;
9 |
10 | import com.crawljax.core.CrawljaxException;
11 | import org.eclipse.jetty.util.resource.Resource;
12 | import org.junit.After;
13 | import org.junit.Before;
14 | import org.junit.Test;
15 |
16 | public class WebServerTest {
17 | private URI site;
18 | private WebServer server;
19 |
20 | private static final int MAX_PORT = 65535;
21 | private static final int MIN_PORT = 0;
22 |
23 | @Before
24 | public void setup() throws Exception {
25 | site = BaseCrawler.class.getResource("/site").toURI();
26 | try {
27 | server = new WebServer(Resource.newResource(site));
28 | }
29 | catch (IOException e) {
30 | throw new CrawljaxException("Could not load resource", e);
31 | }
32 | server.start();
33 | }
34 |
35 | @After
36 | public void stopServer() {
37 | server.stop();
38 | }
39 |
40 | @Test
41 | public void testPort() throws Exception {
42 | assertThat(server.getPort(), is(lessThanOrEqualTo(MAX_PORT)));
43 | assertThat(server.getPort(), is(greaterThanOrEqualTo(MIN_PORT)));
44 | }
45 |
46 | }
47 |
--------------------------------------------------------------------------------
/core/src/test/java/com/crawljax/test/matchers/FileMatcher.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.test.matchers;
2 |
3 | import java.io.File;
4 |
5 | import org.hamcrest.Description;
6 | import org.hamcrest.Factory;
7 | import org.hamcrest.Matcher;
8 | import org.hamcrest.TypeSafeMatcher;
9 |
10 | public class FileMatcher extends TypeSafeMatcher {
11 |
12 | @Override
13 | public boolean matchesSafely(File file) {
14 | return file.exists();
15 | }
16 |
17 | @Override
18 | public void describeTo(Description description) {
19 | description.appendText("file which exists");
20 | }
21 |
22 | @Factory
23 | public static Matcher exists() {
24 | return new FileMatcher();
25 | }
26 |
27 | }
--------------------------------------------------------------------------------
/core/src/test/java/com/crawljax/util/DOMComparerTest.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.util;
2 |
3 | import static org.hamcrest.core.Is.is;
4 | import static org.junit.Assert.assertEquals;
5 | import static org.junit.Assert.assertNotNull;
6 | import static org.junit.Assert.assertThat;
7 |
8 | import java.io.IOException;
9 | import java.util.List;
10 |
11 | import org.custommonkey.xmlunit.Difference;
12 | import org.hamcrest.collection.IsEmptyCollection;
13 | import org.junit.Test;
14 | import org.w3c.dom.Document;
15 |
16 | /**
17 | * Test the comparisons between two Documents.
18 | *
19 | * @author Singla
20 | */
21 |
22 | public class DOMComparerTest {
23 |
24 | @Test
25 | public void compareNoDifference() throws IOException {
26 | String html = "No difference
";
27 |
28 | Document control = DomUtils.asDocument(html);
29 | assertNotNull(control);
30 |
31 | Document test = DomUtils.asDocument(html);
32 | assertNotNull(test);
33 |
34 | DOMComparer dc = new DOMComparer(control, test);
35 |
36 | List differences = dc.compare();
37 | assertThat(differences, is(IsEmptyCollection.empty()));
38 | }
39 |
40 | @Test
41 | public void comparePartialDifference() throws IOException {
42 | String controlHTML =
43 | "There are differences
";
44 | String testHTML =
45 | "Crawljax There are differences.";
46 | final int EXPECTED_DIFF = 7;
47 |
48 | Document control = DomUtils.asDocument(controlHTML);
49 | assertNotNull(control);
50 |
51 | Document test = DomUtils.asDocument(testHTML);
52 | assertNotNull(test);
53 |
54 | DOMComparer dc = new DOMComparer(control, test);
55 |
56 | List differences = dc.compare();
57 | assertEquals("Error: Did not find 5 differences", differences.size(), EXPECTED_DIFF);
58 |
59 | }
60 |
61 | }
--------------------------------------------------------------------------------
/core/src/test/java/com/crawljax/util/DomUtilsBrowserTest.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.util;
2 |
3 | import static org.junit.Assert.assertNotNull;
4 |
5 | import java.io.IOException;
6 | import java.net.URISyntaxException;
7 | import java.net.URL;
8 |
9 | import com.crawljax.browser.BrowserProvider;
10 | import com.crawljax.browser.EmbeddedBrowser;
11 | import com.crawljax.test.BrowserTest;
12 | import org.junit.Before;
13 | import org.junit.Rule;
14 | import org.junit.Test;
15 | import org.junit.experimental.categories.Category;
16 | import org.w3c.dom.Document;
17 | import org.xml.sax.SAXException;
18 |
19 | /**
20 | * Test for the Helper class.
21 | */
22 | @Category(BrowserTest.class)
23 | public class DomUtilsBrowserTest {
24 |
25 |
26 | @Rule
27 | public BrowserProvider provider = new BrowserProvider();
28 |
29 | private EmbeddedBrowser browser;
30 |
31 | @Before
32 | public void before() throws URISyntaxException {
33 | browser = provider.newEmbeddedBrowser();
34 | URL url = DomUtilsBrowserTest.class.getResource("/site/index.html");
35 | browser.goToUrl(url.toURI());
36 | }
37 |
38 | /**
39 | * Test get document from browser function.
40 | */
41 | @Test
42 | public void testGetDocumentFromBrowser() throws SAXException, IOException {
43 |
44 | String html = browser.getStrippedDom();
45 | assertNotNull(html);
46 | Document doc = DomUtils.asDocument(html);
47 | assertNotNull(doc);
48 |
49 | browser.close();
50 | }
51 |
52 | }
53 |
--------------------------------------------------------------------------------
/core/src/test/java/com/crawljax/util/HtmlNamespaceTest.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.util;
2 |
3 | import static org.junit.Assert.assertEquals;
4 | import static org.junit.Assert.assertTrue;
5 |
6 | import javax.xml.XMLConstants;
7 |
8 | import org.junit.Test;
9 |
10 | public class HtmlNamespaceTest {
11 |
12 | @Test
13 | public void testgetNamespaceURI() {
14 | HtmlNamespace testNamespace = new HtmlNamespace();
15 |
16 | String testPrefix = null;
17 | boolean testPass = false;
18 |
19 | try {
20 | testNamespace.getNamespaceURI(testPrefix);
21 | } catch (NullPointerException e) {
22 | testPass = true;
23 | }
24 | assertTrue(testPass);
25 |
26 | assertEquals(XMLConstants.DEFAULT_NS_PREFIX, testNamespace.getNamespaceURI("gibberish"));
27 |
28 | assertEquals("http://www.w3.org/1999/xhtml", testNamespace.getNamespaceURI("html"));
29 |
30 | assertEquals(XMLConstants.XML_NS_URI, testNamespace.getNamespaceURI("xml"));
31 | }
32 |
33 | }
--------------------------------------------------------------------------------
/core/src/test/java/com/crawljax/util/XMLObjectTest.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.util;
2 |
3 | import static org.junit.Assert.assertEquals;
4 | import static org.junit.Assert.assertTrue;
5 | import static org.junit.Assert.fail;
6 |
7 | import java.io.File;
8 | import java.io.FileNotFoundException;
9 | import java.util.ArrayList;
10 |
11 | import org.junit.Test;
12 |
13 | public class XMLObjectTest {
14 |
15 | private final static String filename = "xmlobject-save-to-file-test.xml";
16 |
17 | @SuppressWarnings("unchecked")
18 | @Test
19 | public void saveToFile() {
20 | ArrayList object = new ArrayList();
21 | object.add("Bla");
22 | object.add("Something else");
23 |
24 | try {
25 | XMLObject.objectToXML(object, filename);
26 | } catch (FileNotFoundException e) {
27 | fail("Error saving object");
28 | }
29 | File f = new File(filename);
30 | assertTrue(f.exists());
31 |
32 | object = null;
33 | try {
34 | object = (ArrayList) XMLObject.xmlToObject(filename);
35 | } catch (FileNotFoundException e) {
36 | fail("File not found");
37 | }
38 |
39 | assertEquals(2, object.size());
40 | assertEquals("Bla", object.get(0));
41 | assertEquals("Something else", object.get(1));
42 |
43 | assertTrue(f.delete());
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/core/src/test/java/com/crawljax/util/XmlunitDifferenceTest.java:
--------------------------------------------------------------------------------
1 | // Copyright 2010 Google Inc. All Rights Reserved.
2 |
3 | package com.crawljax.util;
4 |
5 | import java.util.List;
6 |
7 | import org.custommonkey.xmlunit.Difference;
8 | import org.junit.Assert;
9 | import org.junit.Test;
10 |
11 | import com.google.common.collect.Lists;
12 |
13 | /**
14 | * Test the useage of the Helper.getDifferences.
15 | *
16 | * @author slenselink@google.com (Stefan Lenselink)
17 | */
18 | public class XmlunitDifferenceTest {
19 |
20 | @Test
21 | public void testEmptyDoms() {
22 | String left = "";
23 | String right = "";
24 | List l = DomUtils.getDifferences(left, right);
25 | Assert.assertEquals(0, l.size());
26 | }
27 |
28 | @Test
29 | public void testSameIdenticalDoms() {
30 | String left = " ";
31 | String right = " ";
32 | List l = DomUtils.getDifferences(left, right);
33 | Assert.assertEquals(0, l.size());
34 | }
35 |
36 | @Test
37 | public void testSameDomsArrtibutesSame() {
38 | String left = " ";
39 | String right = " ";
40 | List l = DomUtils.getDifferences(left, right);
41 | Assert.assertEquals(0, l.size());
42 | }
43 |
44 | @Test
45 | public void testSameDomsArrtibutesFiltered() {
46 | String left = " ";
47 | String right = " ";
48 | List l = DomUtils.getDifferences(left, right, Lists.newArrayList("value"));
49 | Assert.assertEquals(0, l.size());
50 | }
51 |
52 | }
53 |
--------------------------------------------------------------------------------
/core/src/test/resources/candidateElementExtractorTest/domWithFourTypeDownloadLink.html:
--------------------------------------------------------------------------------
1 |
2 |
3 | Simple page
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
--------------------------------------------------------------------------------
/core/src/test/resources/candidateElementExtractorTest/domWithOneExternalAndTwoInternal.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/core/src/test/resources/configuration/crawljax.properties:
--------------------------------------------------------------------------------
1 | ### select the embedded browser:
2 | ### webdriver.ie (Note: supports only onclick)
3 | ### webdriver.firefox (Note: supports only onclick)
4 | browser = webdriver.firefox
5 |
6 | ### The URL of the site to be crawled
7 | site.url = http://spci.st.ewi.tudelft.nl/demo/wishlist/
8 |
9 | ### The events generated on DOM elements: onclick, onmouseover, onblur, onkeydown...
10 | robot.events = onclick
11 |
12 | ### The tag elements that build up the candidate elements:
13 | ### FORMAT: tag-name:{(attr-name=attr-value+;? ?)*}
14 | ### example: a:{attr=value}, div:{class=aha; id=room}, span:{}
15 | crawl.tags = a:{class=remotetop}
16 |
17 | ### The tag elements that should be excluded from crawling
18 | crawl.tags.exclude = a:{class=remoteleft}
19 |
20 | # click each element only once, 1 means click only once.
21 | # 0 means click on every new state.
22 | click.once = 1
23 |
24 | ### The depth level of the breadth-first search
25 | crawl.depth = 5
26 |
27 | ### The number of ms to wait for initialization
28 | crawl.wait.reload = 400
29 |
30 | ### The number of ms (e.g., 1000) to wait after an event
31 | crawl.wait.event = 400
32 |
33 | ### max number of states - set 0 to ignore
34 | crawl.max.states = 0
35 |
36 | ### max crawling time (seconds) - set 0 to ignore
37 | crawl.max.runtime = 0
38 |
39 | ### enter/choose random input values for form elements - set 1 to enable
40 | crawl.forms.randominput = 1
41 |
42 | ### number of threads used for crawling (support for multi-threaded crawling)
43 | crawl.numberOfThreads = 1
44 |
45 | ### The attributes (in regular expression) which should be stripped from the DOM before it is used.
46 | crawl.filter.attributes = closure_hashcode_(\\w)*, jquery[0-9]+
47 |
--------------------------------------------------------------------------------
/core/src/test/resources/demo-site/conditions.html:
--------------------------------------------------------------------------------
1 | Conditions
2 |
3 | TODO
--------------------------------------------------------------------------------
/core/src/test/resources/demo-site/crawlConditions.html:
--------------------------------------------------------------------------------
1 | CrawlConditions
2 |
3 | Crawljax has CrawlConditions which can be used to guide the crawling with a dynamic approach. When Crawljax finds a new state, it only crawls it when all the CrawlConditions are satisfied. If no CrawlConditions are specified, all the states are crawled.
4 |
5 | Methods in CrawlSpecification
6 |
7 |
8 | addCrawlCondition(String description, Condition crawlCondition)
9 | addCrawlCondition(String description, Condition crawlCondition, Condition... preConditions)
10 |
11 |
12 |
13 | Example 1
14 | Crawljax should onlyl crawl pages with the text foo in the URL.
15 |
16 | CrawljaxConfigurationBuilder builder = CrawljaxConfiguration.builderFor(URL);
17 | ...
18 | UrlCondition onlyFooDomain = new UrlCondition("foo"));
19 | builder.addCrawlCondition("Only crawl foo site", onlyFooDomain);
20 |
21 |
22 | Example 2
23 | Crawljax should never crawl a page with a span with the class 'foo' .
24 |
25 | CrawljaxConfigurationBuilder builder = CrawljaxConfiguration.builderFor(URL);
26 | ...
27 | NotXPathCondition noFooClass = new NotXPathCondition("//SPAN[@class='foo']"));
28 | builder.addCrawlCondition("No spans with foo as class", noFooClass);
29 |
30 |
31 | This page contains a foo and should therefore not be crawled by Crawljax. Thus think link should not be clicked by Crawljax
32 |
33 |
--------------------------------------------------------------------------------
/core/src/test/resources/demo-site/customInput.html:
--------------------------------------------------------------------------------
1 | Forms
2 | Crawljax can fill in customs values in input elements
3 |
4 | Contact
5 |
17 |
18 | Save
19 | Cancel
20 |
21 |
22 |
23 | Example Code
24 |
25 | CrawljaxConfigurationBuilder builder = CrawljaxConfiguration.builderFor(URL);
26 | ...
27 | InputSpecification input = new InputSpecification();
28 | input.field("male").setValue(true);
29 | input.field("name").setValue("Bob");
30 | input.fields("phone", "mobile").setValue("1234567890");
31 | input.field("type").setValue("Student");
32 | input.field("active").setValue(true);
33 | ...
34 | builder.setInputSpecification(input);
35 |
36 |
37 |
--------------------------------------------------------------------------------
/core/src/test/resources/demo-site/forms.html:
--------------------------------------------------------------------------------
1 | Forms
2 | Crawljax can fill in multiple values in forms
3 |
4 | Contact
5 |
16 |
17 | Save
18 | Cancel
19 |
20 |
21 |
22 | Example Code
23 |
24 | CrawljaxConfigurationBuilder builder = CrawljaxConfiguration.builderFor(URL);
25 | ...
26 | InputSpecification input = new InputSpecification();
27 | Form contactForm = new Form();
28 | contactForm.field("male").setValues(true, false);
29 | contactForm.field("female").setValues(false, true);
30 | contactForm.field("name").setValues("Bob", "Alice", "John");
31 | contactForm.field("phone").setValues("1234567890", "1234888888", "");
32 | contactForm.field("type").setValues("Student", "Teacher");
33 | contactForm.field("active").setValues(true);
34 | input.setValuesInForm(contactForm).beforeClickElement("button").withText("Save");
35 | ...
36 | builder.setInputSpecification(input);
37 |
38 |
39 |
--------------------------------------------------------------------------------
/core/src/test/resources/demo-site/img/nav.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zaproxy/crawljax/4ac51f5f16bfab25edcc40abeb0b0233aa5798bc/core/src/test/resources/demo-site/img/nav.gif
--------------------------------------------------------------------------------
/core/src/test/resources/demo-site/info.html:
--------------------------------------------------------------------------------
1 | Information
2 |
3 | Crawling Ajax applications through dynamic analysis and reconstruction of the UI state changes. Crawljax is based on a method which dynamically builds a `state-flow graph' modeling the various navigation paths and states within an Ajax application.
4 | See more here
--------------------------------------------------------------------------------
/core/src/test/resources/demo-site/invariants.html:
--------------------------------------------------------------------------------
1 | Invariants
2 | Invariants can be used to perform tests on the current state
3 |
4 | Methods in CrawlSpecification
5 |
6 |
7 | addInvariant(String description, Condition condition)
8 | addInvariant(String description, Condition condition, Condition...preConditions)
9 |
10 |
11 |
12 | Example 1
13 | Use the generic conditions
14 |
15 | addInvariant("No error messages", new NotRegexCondition("Error [0-9]+");
16 |
17 |
18 | Example 2
19 | Create your own condition
20 |
21 | crawler.addInvariant("Test count myList", new ConditionAbstract(){
22 |
23 | @Override
24 | public boolean check(EmbeddedBrowser browser) {
25 | WebDriver driver = browser.getDriver();
26 | try{
27 | WebElement myList = driver.findElement(By.id("myList"));
28 | return new Select(myList).getOptions().size() > 0;
29 | }catch(NoSuchElementException e){
30 | //not found
31 | return true;
32 | }
33 | }
34 | });
35 |
36 |
--------------------------------------------------------------------------------
/core/src/test/resources/demo-site/js/general.js:
--------------------------------------------------------------------------------
1 | $(document).ready(function(){
2 | openPage("home.html");
3 | setEventHandlers();
4 | //addError("how rude");
5 | //addError("how kewl");
6 | });
7 |
8 | function setEventHandlers(){
9 | $('#info').click(function(){ openPage("info.html") } );
10 | $('#papers').click(function(){ openPage("papers.html") } );
11 | $('#home').click(function(){ openPage("home.html") } );
12 | $('#ignore').click(function(){ addError("Crawljax should ignore me") } );
13 | }
14 |
15 | function openPage(page){
16 | $('#content').load(page);
17 | }
18 |
19 | function addError(msg){
20 | $('#errors').show();
21 | $('#errors').html($('#errors').html() + "ERROR: " + msg + "
");
22 | }
23 |
24 |
25 |
26 | function saveContact(msg){
27 | var content = "" + msg + " ";
28 | var gender = (document.getElementById("male").checked ? "male" : "female");
29 | content += "Gender: " + gender + " ";
30 | content += "Name: " + $('#name').val() + " ";
31 | content += "Phone: " + $('#phone').val() + " ";
32 | content += "Mobile: " + $('#mobile').val() + " ";
33 | content += "Type: " + document.getElementById("type").options[document.getElementById("type").selectedIndex].value + " ";
34 | content += "Active: " + document.getElementById("active").checked;
35 | content += " ";
36 | $('#content').html(content);
37 | }
38 |
39 | function afterRandomInput(){
40 | var content = "filled in random values ";
41 | content += "text: " + document.getElementById("text").value + " ";
42 | content += "checkbox: " + document.getElementById("checkbox").checked + " ";
43 | content += "radio: " + document.getElementById("radio").checked + " ";
44 | content += "Select: " + document.getElementById("select").value;
45 | content += " ";
46 |
47 | $('#content').html(content);
48 |
49 | }
50 |
--------------------------------------------------------------------------------
/core/src/test/resources/demo-site/oracleComparators.html:
--------------------------------------------------------------------------------
1 | Oracle Comparators
2 |
3 | Methods in CrawlSpecification
4 |
5 |
6 | addOracleComparator(String id, Oracle oracleComparator)
7 | addOracleComparator(String id, Oracle oracleComparator, Condition...preConditions)
8 |
9 | The argument id is used to indenty the oracles comparators easily. For example, there could me multple AttributeOracle comparators.
10 |
11 |
12 | Example
13 | Example of an Oracle Comparator
14 |
15 | public class IgnoreCasingOracle extends OracleAbstract {
16 |
17 | @Override
18 | public boolean isEquivalent() {
19 | return getOriginalDom().equalsIgnoreCase(getNewDom());
20 | }
21 | }
22 |
23 | Adding to the CrawlSpecification
24 |
25 | CrawljaxConfigurationBuilder builder = CrawljaxConfiguration.builderFor(URL);
26 | ...
27 | UrlCondition onFooPage = new UrlhCondition("#foo"));
28 | builder.addOracleComparator("Ignore casing on foo pages", new IgnoreCasingOracle())
29 |
30 |
31 | Oracle Comparator Pipelining
32 | TODO
33 |
--------------------------------------------------------------------------------
/core/src/test/resources/demo-site/papers.html:
--------------------------------------------------------------------------------
1 | Papers
2 | The following papers related to Crawljax are published:
3 |
9 |
10 |
--------------------------------------------------------------------------------
/core/src/test/resources/demo-site/randomInput.html:
--------------------------------------------------------------------------------
1 | Random Input
2 |
3 | Crawljax enters random input while crawling of no inputvalues are specified
4 |
5 | When Crawljax finds form input elements in a state, it sets a random value in the elements:
6 |
7 |
40 |
41 |
42 |
Next
--------------------------------------------------------------------------------
/core/src/test/resources/demo-site/waitConditions.html:
--------------------------------------------------------------------------------
1 | WaitConditions
2 |
3 | Websites can have widgets that load relatively slow, which can cause flakyness in the crawling or while testing. With WaitConditions the user can specify that in wich url (or part of the url) Crawljax should wait for certain elements or widgets to become visible.
4 |
5 | Methods in CrawlSpecification
6 |
7 |
8 | waitFor(String url, ExpectedCondition...expectedConditions) {
9 | waitFor(String url, int timeout, ExpectedCondition...expectedConditions)
10 |
11 |
12 |
13 |
14 | Example
15 | Wait for a list to be loaded on the #contact page.
16 |
17 | CrawljaxConfigurationBuilder builder = CrawljaxConfiguration.builderFor(URL);
18 | ...
19 | builder.waitFor("#contact", new ExpectedVisibleCondition(By.id("contactList")));
20 |
21 |
22 | Note: currently only supported when WebDriver is used
23 |
--------------------------------------------------------------------------------
/core/src/test/resources/logback-test.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 | %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
--------------------------------------------------------------------------------
/core/src/test/resources/realm.properties:
--------------------------------------------------------------------------------
1 | test: MD5:438E713A9EE9256F50816C1DACECCD9C, user
--------------------------------------------------------------------------------
/core/src/test/resources/site/concurrentcrawl1/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Index ABC
5 |
6 |
7 | Index ABC
8 | Pages ABC
9 | A
10 | B
11 | C
12 |
13 |
--------------------------------------------------------------------------------
/core/src/test/resources/site/concurrentcrawl1/page_a.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | A
5 |
6 |
7 | Page A
8 | Page A.
9 |
10 |
--------------------------------------------------------------------------------
/core/src/test/resources/site/concurrentcrawl1/page_b.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | B
5 |
6 |
7 | Page B
8 | Page B.
9 |
10 |
--------------------------------------------------------------------------------
/core/src/test/resources/site/concurrentcrawl1/page_c.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | C
5 |
6 |
7 | Page C
8 | Page C.
9 |
10 |
--------------------------------------------------------------------------------
/core/src/test/resources/site/concurrentcrawl2/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Index XYZ
5 |
6 |
7 | Index XYZ
8 | Pages XYZ
9 | X
10 | Y
11 | Z
12 |
13 |
--------------------------------------------------------------------------------
/core/src/test/resources/site/concurrentcrawl2/page_x.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | X
5 |
6 |
7 | Page X
8 | Page X.
9 |
10 |
--------------------------------------------------------------------------------
/core/src/test/resources/site/concurrentcrawl2/page_y.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Y
5 |
6 |
7 | Page Y
8 | Page Y.
9 |
10 |
--------------------------------------------------------------------------------
/core/src/test/resources/site/concurrentcrawl2/page_z.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Z
5 |
6 |
7 | Page Z
8 | Page Z.
9 |
10 |
--------------------------------------------------------------------------------
/core/src/test/resources/site/crawlconditions/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Index
5 |
6 |
7 |
8 |
Index
9 |
This is the index. All other pages should not be crawled due to
10 | crawl conditions.
11 |
12 | Regex
13 | Illegal P
14 |
15 | This is the container. Text and html will be
16 | loaded here.
17 |
18 |
19 |
20 |
29 |
30 |
--------------------------------------------------------------------------------
/core/src/test/resources/site/crawler/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
20 |
21 |
22 |
--------------------------------------------------------------------------------
/core/src/test/resources/site/crawler/payload_10.html:
--------------------------------------------------------------------------------
1 |
8 | S10
9 |
--------------------------------------------------------------------------------
/core/src/test/resources/site/crawler/payload_11.html:
--------------------------------------------------------------------------------
1 | Final state S10
--------------------------------------------------------------------------------
/core/src/test/resources/site/crawler/payload_2.html:
--------------------------------------------------------------------------------
1 | Final state S2
--------------------------------------------------------------------------------
/core/src/test/resources/site/crawler/payload_3.html:
--------------------------------------------------------------------------------
1 |
11 | S3
12 | S6
13 |
--------------------------------------------------------------------------------
/core/src/test/resources/site/crawler/payload_4.html:
--------------------------------------------------------------------------------
1 |
8 | S4
9 |
--------------------------------------------------------------------------------
/core/src/test/resources/site/crawler/payload_5.html:
--------------------------------------------------------------------------------
1 |
8 | S5
9 |
--------------------------------------------------------------------------------
/core/src/test/resources/site/crawler/payload_6.html:
--------------------------------------------------------------------------------
1 | Final state S5
--------------------------------------------------------------------------------
/core/src/test/resources/site/crawler/payload_7.html:
--------------------------------------------------------------------------------
1 |
11 | S7
12 | S9
13 |
--------------------------------------------------------------------------------
/core/src/test/resources/site/crawler/payload_8.html:
--------------------------------------------------------------------------------
1 |
8 | S8
9 |
--------------------------------------------------------------------------------
/core/src/test/resources/site/crawler/payload_9.html:
--------------------------------------------------------------------------------
1 | Final state S8
--------------------------------------------------------------------------------
/core/src/test/resources/site/crawlscope/in_scope.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | In Scope
5 |
6 |
7 | In Scope
8 | This page can be accessed and crawled.
9 | inner page
10 |
11 |
--------------------------------------------------------------------------------
/core/src/test/resources/site/crawlscope/in_scope_inner.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | In Scope Inner Page
5 |
6 |
7 | In Scope Inner Page
8 | This page should be accessed.
9 |
10 |
--------------------------------------------------------------------------------
/core/src/test/resources/site/crawlscope/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Index Scope Test
5 |
6 |
7 | Index Scope Test
8 | Pages that are in and out of crawl scope.
9 | out of scope
10 | in scope
11 |
12 |
--------------------------------------------------------------------------------
/core/src/test/resources/site/crawlscope/out_of_scope.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Out of Scope
5 |
6 |
7 | Out of Scope
8 | This page can be accessed but not crawled.
9 | inner page
10 |
11 |
--------------------------------------------------------------------------------
/core/src/test/resources/site/crawlscope/out_of_scope_inner.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Out of Scope Inner Page
5 |
6 |
7 | Out of Scope Inner Page
8 | This page should not be accessed.
9 |
10 |
--------------------------------------------------------------------------------
/core/src/test/resources/site/download/download.blob:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zaproxy/crawljax/4ac51f5f16bfab25edcc40abeb0b0233aa5798bc/core/src/test/resources/site/download/download.blob
--------------------------------------------------------------------------------
/core/src/test/resources/site/download/download.html:
--------------------------------------------------------------------------------
1 |
2 |
3 | Simple page
4 |
5 |
6 | Simple download page
7 | Download the jquery file
8 | This link should be clicked in stead of being stuck on the previous download link.
9 |
10 |
--------------------------------------------------------------------------------
/core/src/test/resources/site/formhandler/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Index Form Handler Test
5 |
6 |
7 | Index Form Handler Test
8 |
9 |
10 |
--------------------------------------------------------------------------------
/core/src/test/resources/site/hidden-elements-site/a.html:
--------------------------------------------------------------------------------
1 |
2 |
3 | Index
4 |
5 |
6 | This is A
7 |
8 |
--------------------------------------------------------------------------------
/core/src/test/resources/site/hidden-elements-site/b.html:
--------------------------------------------------------------------------------
1 |
2 |
3 | B
4 |
5 |
6 | This is B
7 | Go to C
8 |
9 |
--------------------------------------------------------------------------------
/core/src/test/resources/site/hidden-elements-site/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 | Index
4 |
5 |
6 |
7 |
Index
8 |
This is the index and it shows two more sites:
9 |
10 | A is clickable as a anchor with an href leading to a.html
11 | B is clickable as a anchor but without an href . It has a Javascript function attached that directs you to B.
12 |
13 |
14 |
Hover me to see the links
15 |
21 |
22 |
23 |
24 |
38 |
39 |
--------------------------------------------------------------------------------
/core/src/test/resources/site/home.html:
--------------------------------------------------------------------------------
1 | HOMEPAGE
2 | This state changes every time, but should be added only once because of the oracle comparators
3 |
4 | Random style
5 |
--------------------------------------------------------------------------------
/core/src/test/resources/site/iframe/iframe.html:
--------------------------------------------------------------------------------
1 |
2 |
3 | Iframe
4 |
5 |
6 |
7 |
13 |
14 |
15 |
16 |
17 | This content is from the iframe.
18 |
19 |
20 | load-c2
21 | load-c3
22 |
23 |
25 |
26 |
27 |
28 |
29 |
--------------------------------------------------------------------------------
/core/src/test/resources/site/iframe/iframe2.html:
--------------------------------------------------------------------------------
1 |
2 |
3 | Iframe 2
4 |
5 |
6 |
7 |
16 |
17 |
18 |
19 |
20 | This content is from the iframe.
21 |
22 | load-c8
23 | load-c9
24 |
25 |
27 |
28 |
29 |
30 |
31 |
32 |
--------------------------------------------------------------------------------
/core/src/test/resources/site/iframe/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 | Window with iframe
4 |
5 |
7 |
8 |
15 |
20 |
21 |
30 |
32 |
34 |
35 | This is where the content will be
36 |
37 |
38 |
39 |
40 |
41 |
--------------------------------------------------------------------------------
/core/src/test/resources/site/iframe/page0-0-0.html:
--------------------------------------------------------------------------------
1 |
2 |
3 | Nested IFrame
4 |
5 |
6 |
28 |
29 |
30 |
31 | load-c11
32 |
34 |
36 |
37 |
38 |
39 |
40 |
--------------------------------------------------------------------------------
/core/src/test/resources/site/iframe/page0-0.html:
--------------------------------------------------------------------------------
1 |
2 |
3 | First iframe
4 |
5 |
21 |
22 |
23 |
24 |
26 |
28 |
29 |
30 |
31 |
32 |
--------------------------------------------------------------------------------
/core/src/test/resources/site/iframe/page0.html:
--------------------------------------------------------------------------------
1 |
2 |
3 | Main top page
4 |
5 |
21 |
22 |
23 |
24 |
26 |
28 |
29 |
30 |
31 |
32 |
--------------------------------------------------------------------------------
/core/src/test/resources/site/iframe/subiframe.html:
--------------------------------------------------------------------------------
1 |
2 |
3 | Sub Iframe
4 |
5 |
6 |
7 |
16 |
17 |
18 |
19 |
20 | This content is from the iframe.
21 |
22 | load-c10
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
--------------------------------------------------------------------------------
/core/src/test/resources/site/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Crawljax testSite
6 |
7 |
8 |
9 |
10 |
23 |
24 |
25 |
26 |
--------------------------------------------------------------------------------
/core/src/test/resources/site/infinite.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
18 |
19 |
20 |
21 | Loads an infinite number of states
22 |
25 |
26 |
--------------------------------------------------------------------------------
/core/src/test/resources/site/navigate_other_urls.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Simple page
5 |
6 |
7 | Simple page
8 |
9 | Open
10 | another state
11 |
12 | Show button to leave the site
13 |
14 |
15 |
17 |
18 | Open
19 | another state 22
20 |
21 |
22 |
23 |
24 |
46 |
47 |
--------------------------------------------------------------------------------
/core/src/test/resources/site/popup/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Crawljax testSite
6 |
7 |
8 |
44 |
45 |
46 |
47 |
48 |
56 |
57 |
58 |
59 |
--------------------------------------------------------------------------------
/core/src/test/resources/site/simple.html:
--------------------------------------------------------------------------------
1 |
2 |
3 | Simple page
4 |
5 |
6 | Simple page
7 | Nothing fancy here. Not even Javascript or CSS.
8 |
9 |
--------------------------------------------------------------------------------
/core/src/test/resources/site/simplelink/simplelink.html:
--------------------------------------------------------------------------------
1 |
2 |
3 | Simple page
4 |
5 |
6 | Simple link page
7 | This link should be clicked.
8 |
9 |
--------------------------------------------------------------------------------
/core/src/test/resources/site/testCrawlElementCondition.html:
--------------------------------------------------------------------------------
1 |
2 | DONT_CLICK_BUTTONS_ON_THIS_PAGE
3 | DONT_CLICK_ME_BECAUSE_OF_CONDITION
4 |
--------------------------------------------------------------------------------
/core/src/test/resources/site/testCrawlElements.html:
--------------------------------------------------------------------------------
1 | Test for crawltags
2 |
3 |
4 | CLICK_ME
5 | DONT_CLICK_ME_BECAUSE_I_AM_A_NORMALE_DIV
6 | CLICK_ME
7 |
8 |
9 | CLICK_ME
10 | DONT_CLICK_ME
11 | DONT_CLICK_ME
12 |
13 |
14 | CLICK_ME
15 | DONT_CLICK_ME
16 |
17 |
18 |
19 | CLICK_ME_UNDER_HERE
20 |
21 |
22 |
25 |
26 |
27 | REGEX_CONDITION_TRUE
28 | CLICK_ME_BECAUSE_OF_CONDITION
29 |
--------------------------------------------------------------------------------
/core/src/test/resources/site/testCrawlconditions.html:
--------------------------------------------------------------------------------
1 | DONT_CRAWL_ME
2 | DONT_CLICK_ME_BECAUSE_OF_CRAWLCONDITION
--------------------------------------------------------------------------------
/core/src/test/resources/site/testInvariants.html:
--------------------------------------------------------------------------------
1 | TEST_INVARIANTS
2 |
3 |
--------------------------------------------------------------------------------
/core/src/test/resources/site/testOracleComparators.html:
--------------------------------------------------------------------------------
1 | this page links to the home page. The home page is alway different but should be added to the state machine once
2 | because the oracle comparators should consider the home page equivalent every time
3 |
4 | Home
--------------------------------------------------------------------------------
/core/src/test/resources/site/testWaitCondition.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 | TEST_WAITCONDITION
8 |
9 |
10 | loading...
11 |
12 |
18 |
19 |
--------------------------------------------------------------------------------
/core/src/test/resources/site/underxpath.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
9 |
10 |
11 |
12 |
17 |
20 |
23 |
24 |
25 |
--------------------------------------------------------------------------------
/examples/pom.xml:
--------------------------------------------------------------------------------
1 |
2 | 4.0.0
3 |
4 | org.zaproxy.crawljax
5 | crawljax-parent-pom
6 | 3.8-SNAPSHOT
7 |
8 | crawljax-examples
9 | Crawljax Examples
10 | Crawljax usage example
11 |
12 |
13 | ${project.groupId}
14 | crawljax-core
15 | ${project.version}
16 |
17 |
18 | org.zaproxy.crawljax.plugins
19 | crawloverview-plugin
20 | ${project.version}
21 |
22 |
23 |
24 |
25 |
26 | maven-deploy-plugin
27 |
28 | true
29 |
30 |
31 |
32 |
33 |
--------------------------------------------------------------------------------
/examples/src/main/java/com/crawljax/examples/CrawlScopeExample.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.examples;
2 |
3 | import com.crawljax.core.CrawljaxRunner;
4 | import com.crawljax.core.configuration.CrawljaxConfiguration;
5 | import com.crawljax.core.configuration.CrawljaxConfiguration.CrawljaxConfigurationBuilder;
6 |
7 | /**
8 | * Example of running Crawljax with a custom crawl scope.
9 | */
10 | public final class CrawlScopeExample {
11 |
12 | private static final String URL = "http://example.com/";
13 |
14 | /**
15 | * Run this method to start the crawl.
16 | */
17 | public static void main(String[] args) {
18 | CrawljaxConfigurationBuilder builder = CrawljaxConfiguration.builderFor(URL);
19 |
20 | // Don't allow to crawl subdomains (default scope crawls subdomains).
21 | builder.setCrawlScope(url -> url.startsWith(URL));
22 |
23 | CrawljaxRunner crawljax = new CrawljaxRunner(builder.build());
24 | crawljax.call();
25 | }
26 |
27 | }
28 |
--------------------------------------------------------------------------------
/examples/src/main/java/com/crawljax/examples/PluginExample.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.examples;
2 |
3 | import org.slf4j.Logger;
4 | import org.slf4j.LoggerFactory;
5 |
6 | import com.crawljax.core.CrawlerContext;
7 | import com.crawljax.core.CrawljaxRunner;
8 | import com.crawljax.core.configuration.CrawljaxConfiguration;
9 | import com.crawljax.core.configuration.CrawljaxConfiguration.CrawljaxConfigurationBuilder;
10 | import com.crawljax.core.plugin.OnNewStatePlugin;
11 | import com.crawljax.core.state.StateVertex;
12 |
13 | /**
14 | * This example shows how to add your own plugin. The plugin just prints the DOM when a new state is
15 | * detected.
16 | */
17 | public class PluginExample {
18 |
19 | private static final Logger LOG = LoggerFactory.getLogger(PluginExample.class);
20 |
21 | public static void main(String[] args) {
22 |
23 | CrawljaxConfigurationBuilder builder =
24 | CrawljaxConfiguration.builderFor("http://demo.crawljax.com/");
25 | builder.addPlugin(new OnNewStatePlugin() {
26 |
27 | @Override
28 | public void onNewState(CrawlerContext context, StateVertex newState) {
29 | // This will print the DOM when a new state is detected. You should see it in your
30 | // console.
31 | LOG.info("Found a new dom! Here it is:\n{}", context.getBrowser().getStrippedDom());
32 | }
33 |
34 | @Override
35 | public String toString() {
36 | return "Our example plugin";
37 | }
38 | });
39 | CrawljaxRunner crawljax = new CrawljaxRunner(builder.build());
40 | crawljax.call();
41 | }
42 | }
43 |
--------------------------------------------------------------------------------
/examples/src/main/java/com/crawljax/examples/SimplestExample.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.examples;
2 |
3 | import com.crawljax.core.CrawljaxRunner;
4 | import com.crawljax.core.configuration.CrawljaxConfiguration;
5 |
6 | /**
7 | * Crawls our demo site with the default configuration. The crawl will log what it's doing but will
8 | * not produce any output.
9 | */
10 | public class SimplestExample {
11 |
12 | /**
13 | * Run this method to start the crawl.
14 | */
15 | public static void main(String[] args) {
16 | CrawljaxRunner crawljax =
17 | new CrawljaxRunner(CrawljaxConfiguration.builderFor("http://demo.crawljax.com/")
18 | .build());
19 | crawljax.call();
20 | }
21 | }
22 |
--------------------------------------------------------------------------------
/examples/src/main/resources/logback.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 | [%thread] %-5level - %msg%n
8 |
9 |
10 |
11 |
12 | crawljax.log
13 | false
14 |
15 | %d{HH:mm:ss.SSS} [%thread] %-5level %logger{46} \(%L\) - %msg%n
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/plugins/README.md:
--------------------------------------------------------------------------------
1 | Crawljax Plugins Parent POM
2 | ===========================
3 |
4 | The parent Maven POM for Crawljax plugins.
5 |
6 | Parent POM
7 | ----------
8 | Start out by adding the parent configuration to your pom:
9 |
10 |
11 | org.zaproxy.crawljax.plugins
12 | plugin
13 | 2.2
14 |
15 |
16 |
17 | Properties
18 | ----------
19 |
20 | The pom includes properties that allow various build configurations to be customized.
21 | For example, to override the default version of crawljax, just set a property:
22 |
23 |
24 | VERSION
25 |
26 |
27 |
28 | Building Plugins
29 | ----------------
30 |
31 | For more information on how to write a Crawljax plugin see [this page](https://github.com/crawljax/crawljax/wiki/Writing-a-plugin).
32 |
--------------------------------------------------------------------------------
/plugins/crawloverview-plugin/.gitignore:
--------------------------------------------------------------------------------
1 | target
2 | git.properties
3 | .project
4 | .classpath
5 | .settings
6 | *.log
7 |
--------------------------------------------------------------------------------
/plugins/crawloverview-plugin/.travis.yml:
--------------------------------------------------------------------------------
1 | language: java
2 | before_install:
3 | - "export DISPLAY=:99.0"
4 | - "sh -e /etc/init.d/xvfb start"
5 |
--------------------------------------------------------------------------------
/plugins/crawloverview-plugin/README.md:
--------------------------------------------------------------------------------
1 | Crawloverview plugin [](https://travis-ci.org/crawljax/crawloverview-plugin)
2 | ====================
3 |
4 | Generates an HTML report with a snapshot overview of what is crawled by Crawljax.
5 |
6 | Maven
7 | -----
8 |
9 |
10 | org.zaproxy.crawljax.plugins
11 | crawloverview
12 | 1.2
13 |
14 |
15 | Using the plugin
16 | ----------------
17 | public class CrawlOverviewExample {
18 |
19 | private static final String URL = "http://google.com";
20 |
21 | public static void main(String[] args) {
22 | CrawljaxConfiguration config = new CrawljaxConfiguration();
23 | CrawlSpecification crawler = new CrawlSpecification(URL);
24 | crawler.setMaximumStates(5);
25 | crawler.clickDefaultElements();
26 | config.setCrawlSpecification(crawler);
27 | config.addPlugin(new CrawlOverview());
28 | try {
29 | CrawljaxController crawljax = new CrawljaxController(config);
30 | crawljax.run();
31 | } catch (Exception e) {
32 | e.printStackTrace();
33 | }
34 | }
35 | }
36 |
37 | The result will be generated in a folder called "crawloverview".
38 |
--------------------------------------------------------------------------------
/plugins/crawloverview-plugin/src/main/java/com/crawljax/plugins/crawloverview/CrawlOverviewException.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.plugins.crawloverview;
2 |
3 | import com.crawljax.core.CrawljaxException;
4 |
5 | /**
6 | * Gets thrown when something unexpected goes wrong inside the {@link CrawlOverview} plugin.
7 | */
8 | @SuppressWarnings("serial")
9 | public class CrawlOverviewException extends CrawljaxException {
10 |
11 | public CrawlOverviewException(String message, Throwable cause) {
12 | super(message, cause);
13 | }
14 |
15 | public CrawlOverviewException(String message) {
16 | super(message);
17 | }
18 |
19 | }
20 |
--------------------------------------------------------------------------------
/plugins/crawloverview-plugin/src/main/java/com/crawljax/plugins/crawloverview/ImageWriter.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.plugins.crawloverview;
2 |
3 | import java.awt.Color;
4 | import java.awt.Graphics2D;
5 | import java.awt.Image;
6 | import java.awt.image.BufferedImage;
7 | import java.io.ByteArrayInputStream;
8 | import java.io.File;
9 | import java.io.IOException;
10 |
11 | import javax.imageio.ImageIO;
12 |
13 | import com.crawljax.core.CrawljaxException;
14 |
15 | public class ImageWriter {
16 |
17 | private static final int THUMBNAIL_WIDTH = 200;
18 | private static final int THUMBNAIL_HEIGHT = 200;
19 |
20 | static void writeScreenShotAndThumbnail(byte[] png, File fullFile, File thumnail) {
21 | try {
22 | Image image = ImageIO.read(new ByteArrayInputStream(png));
23 | writeFullSizeJpeg(fullFile, image);
24 | writeThumbNail(thumnail, image);
25 | } catch (IOException e) {
26 | throw new CrawljaxException("Could not write screenshots to disk", e);
27 | }
28 |
29 | }
30 |
31 | private static void writeFullSizeJpeg(File target, Image image) throws IOException {
32 | int height = image.getHeight(null);
33 | int width = image.getWidth(null);
34 | BufferedImage bufImg = new BufferedImage(width, height, BufferedImage.TYPE_INT_RGB);
35 | Graphics2D graphics = bufImg.createGraphics();
36 | graphics.drawImage(image, 0, 0, Color.WHITE, null);
37 | graphics.dispose();
38 | ImageIO.write(bufImg, "jpg", target);
39 | }
40 |
41 | private static void writeThumbNail(File target, Image screenshot) throws IOException {
42 | BufferedImage resizedImage =
43 | new BufferedImage(THUMBNAIL_WIDTH, THUMBNAIL_HEIGHT, BufferedImage.TYPE_INT_RGB);
44 | Graphics2D g = resizedImage.createGraphics();
45 | g.drawImage(screenshot, 0, 0, THUMBNAIL_WIDTH, THUMBNAIL_HEIGHT, Color.WHITE, null);
46 | g.dispose();
47 | ImageIO.write(resizedImage, "jpg", target);
48 | }
49 |
50 | private ImageWriter() {
51 | }
52 | }
--------------------------------------------------------------------------------
/plugins/crawloverview-plugin/src/main/java/com/crawljax/plugins/crawloverview/model/StateCounter.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.plugins.crawloverview.model;
2 |
3 | import java.util.Objects;
4 |
5 | import com.fasterxml.jackson.annotation.JsonCreator;
6 | import com.fasterxml.jackson.annotation.JsonProperty;
7 | import com.google.common.base.MoreObjects;
8 |
9 | public class StateCounter {
10 |
11 | private final String id;
12 | private final int count;
13 |
14 | @JsonCreator
15 | public StateCounter(@JsonProperty("id") String id, @JsonProperty("count") int count) {
16 | this.id = id;
17 | this.count = count;
18 | }
19 |
20 | public int getCount() {
21 | return count;
22 | }
23 |
24 | public String getId() {
25 | return id;
26 | }
27 |
28 | @Override
29 | public String toString() {
30 | return MoreObjects.toStringHelper(this)
31 | .add("id", id)
32 | .add("count", count)
33 | .toString();
34 | }
35 |
36 | @Override
37 | public int hashCode() {
38 | return Objects.hash(id, count);
39 | }
40 |
41 | @Override
42 | public boolean equals(Object object) {
43 | if (object instanceof StateCounter) {
44 | StateCounter that = (StateCounter) object;
45 | return Objects.equals(this.id, that.id)
46 | && Objects.equals(this.count, that.count);
47 | }
48 | return false;
49 | }
50 |
51 | }
52 |
--------------------------------------------------------------------------------
/plugins/crawloverview-plugin/src/main/resources/config.html:
--------------------------------------------------------------------------------
1 | #set ( $page = "config") #set( $baseUrl = ".")
2 |
3 |
6 |
7 |
8 | #foreach ($mapEntry in $config.entrySet())
9 |
10 | ${mapEntry.key}
11 | #if( ! $mapEntry.value )
12 | undefined
13 | #else
14 | ${mapEntry.value}
15 | #end
16 |
17 | #end
18 |
19 |
20 |
21 | #parse ( "version.html" )
--------------------------------------------------------------------------------
/plugins/crawloverview-plugin/src/main/resources/header.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Crawl overview
6 |
7 |
8 |
9 |
10 |
11 |
14 |
--------------------------------------------------------------------------------
/plugins/crawloverview-plugin/src/main/resources/nav.html:
--------------------------------------------------------------------------------
1 |
2 |
32 |
--------------------------------------------------------------------------------
/plugins/crawloverview-plugin/src/main/resources/plugin-descriptor.xml:
--------------------------------------------------------------------------------
1 |
2 |
4 | Crawl Overview Plugin
5 | Generates an HTML report with a snapshot overview of what is crawled.
6 |
7 | ${project.version}
8 |
9 |
--------------------------------------------------------------------------------
/plugins/crawloverview-plugin/src/main/resources/skeleton/img/glyphicons-halflings-white.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zaproxy/crawljax/4ac51f5f16bfab25edcc40abeb0b0233aa5798bc/plugins/crawloverview-plugin/src/main/resources/skeleton/img/glyphicons-halflings-white.png
--------------------------------------------------------------------------------
/plugins/crawloverview-plugin/src/main/resources/skeleton/img/glyphicons-halflings.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zaproxy/crawljax/4ac51f5f16bfab25edcc40abeb0b0233aa5798bc/plugins/crawloverview-plugin/src/main/resources/skeleton/img/glyphicons-halflings.png
--------------------------------------------------------------------------------
/plugins/crawloverview-plugin/src/main/resources/urls.html:
--------------------------------------------------------------------------------
1 | #set ( $page = "urls") #set( $baseUrl = ".")
2 |
5 |
6 |
7 | #foreach( $url in $urls.keySet() )
8 |
9 | ${url}
10 | #foreach( $state in $urls.get($url) ) ${state} , #end
12 |
13 |
14 | #end
15 |
16 |
17 |
--------------------------------------------------------------------------------
/plugins/crawloverview-plugin/src/main/resources/version.html:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 |
7 |
8 | Crawljax version
9 | ${project.version}
10 |
11 |
12 | Git revision
13 | ${git.commit.id}
14 |
15 |
16 | Git describe
17 | ${git.commit.id.describe}
18 |
19 |
20 | Build time
21 | ${git.build.time}
22 |
23 |
24 |
25 |
--------------------------------------------------------------------------------
/plugins/crawloverview-plugin/src/test/java/com/crawljax/matchers/IsValidJson.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.matchers;
2 |
3 | import java.io.File;
4 | import java.io.IOException;
5 |
6 | import org.hamcrest.Description;
7 | import org.hamcrest.Factory;
8 | import org.hamcrest.Matcher;
9 | import org.hamcrest.TypeSafeMatcher;
10 |
11 | import com.fasterxml.jackson.core.JsonParser;
12 | import com.fasterxml.jackson.databind.ObjectMapper;
13 |
14 | public class IsValidJson {
15 |
16 | /**
17 | * @return a {@link Matcher} that checks if the given {@link File} contains a valid JSON object.
18 | */
19 | @Factory
20 | public static Matcher isValidJson() {
21 | return new TypeSafeMatcher() {
22 |
23 | @Override
24 | public void describeTo(Description description) {
25 | description.appendText("Valid JSON String");
26 | }
27 |
28 | @Override
29 | protected boolean matchesSafely(File item) {
30 | boolean valid = false;
31 | try {
32 | JsonParser parser = new ObjectMapper().getFactory().createParser(item);
33 | while (parser.nextToken() != null) {
34 | }
35 | valid = true;
36 | } catch (IOException e) {
37 | throw new AssertionError(e);
38 | }
39 |
40 | return valid;
41 | }
42 | };
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/plugins/crawloverview-plugin/src/test/java/com/crawljax/plugins/crawloverview/BeanToReadableMapTest.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.plugins.crawloverview;
2 |
3 | import static org.hamcrest.collection.IsMapContaining.hasEntry;
4 | import static org.hamcrest.core.Is.is;
5 | import static org.junit.Assert.assertThat;
6 |
7 | import java.util.Map;
8 |
9 | import org.junit.Test;
10 |
11 | public class BeanToReadableMapTest {
12 |
13 | @Test
14 | public void test() {
15 | Map map = BeanToReadableMap.toMap(new TestBean());
16 | assertThat(map.size(), is(4));
17 | assertThat(map, hasEntry("Some String", "A"));
18 | assertThat(map, hasEntry("Some Int", "123"));
19 | assertThat(map, hasEntry("String List", ""));
20 | assertThat(map, hasEntry("Object List", ""));
21 | }
22 | }
23 |
--------------------------------------------------------------------------------
/plugins/crawloverview-plugin/src/test/java/com/crawljax/plugins/crawloverview/CandidateElementMatcher.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.plugins.crawloverview;
2 |
3 | import org.hamcrest.CustomMatcher;
4 | import org.hamcrest.Factory;
5 | import org.openqa.selenium.Dimension;
6 | import org.openqa.selenium.Point;
7 |
8 | import com.crawljax.plugins.crawloverview.model.CandidateElementPosition;
9 |
10 | class CandidateElementMatcher extends CustomMatcher {
11 |
12 | private CandidateElementPosition actual;
13 |
14 | public CandidateElementMatcher(CandidateElementPosition actual) {
15 | super("A " + CandidateElementPosition.class.getName() + " with coordinates");
16 | this.actual = actual;
17 | }
18 |
19 | @Override
20 | public boolean matches(Object item) {
21 | if (item instanceof CandidateElementPosition) {
22 | CandidateElementPosition element = (CandidateElementPosition) item;
23 | return element.getLeft() == actual.getLeft() && element.getTop() == actual.getTop()
24 | && element.getWidth() == actual.getWidth()
25 | && element.getHeight() == actual.getHeight();
26 | } else {
27 | return false;
28 | }
29 | }
30 |
31 | @Factory
32 | public static CandidateElementMatcher element(Point point, Dimension size) {
33 | return new CandidateElementMatcher(new CandidateElementPosition(null, point, size));
34 | }
35 |
36 | }
--------------------------------------------------------------------------------
/plugins/crawloverview-plugin/src/test/java/com/crawljax/plugins/crawloverview/ImageWriterTest.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.plugins.crawloverview;
2 |
3 | import static org.hamcrest.core.Is.is;
4 | import static org.junit.Assert.assertThat;
5 |
6 | import java.io.File;
7 | import java.net.URI;
8 |
9 | import org.junit.Rule;
10 | import org.junit.Test;
11 | import org.junit.rules.TemporaryFolder;
12 |
13 | import com.google.common.hash.Hashing;
14 | import com.google.common.io.Files;
15 |
16 | public class ImageWriterTest {
17 |
18 | private static final String THUMB_HASH = "4040cc9bb7aaea0eebdc2879055eb68b";
19 | private static final String FULL_HASH = "84254c0309e738b13dc3968414dcae5a";
20 |
21 | @Rule
22 | public final TemporaryFolder folder = new TemporaryFolder();
23 |
24 | @Test
25 | public void makingAThumbnailDoesntThrowException() throws Exception {
26 | URI file = OutputBuilderTest.class.getResource("/screenshot.png").toURI();
27 | File screenShot = new File(file);
28 | File fullScreenShot = folder.newFile();
29 | File thumbnail = folder.newFile();
30 | ImageWriter.writeScreenShotAndThumbnail(Files.toByteArray(screenShot), fullScreenShot,
31 | thumbnail);
32 |
33 | assertThat("Thumbnail exists", thumbnail.exists(), is(true));
34 | String hash = Files.asByteSource(thumbnail).hash(Hashing.murmur3_128()).toString();
35 | assertThat("Thumb hash doesn't match", hash, is(THUMB_HASH));
36 |
37 | assertThat("Screenshot exists", fullScreenShot.exists(), is(true));
38 | hash = Files.asByteSource(fullScreenShot).hash(Hashing.murmur3_128()).toString();
39 | assertThat("Screenshot hash doesn't match", hash, is(FULL_HASH));
40 | }
41 | }
42 |
--------------------------------------------------------------------------------
/plugins/crawloverview-plugin/src/test/java/com/crawljax/plugins/crawloverview/OutputBuilderTest.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.plugins.crawloverview;
2 |
3 | import static org.hamcrest.core.Is.is;
4 | import static org.hamcrest.text.IsEmptyString.isEmptyString;
5 | import static org.junit.Assert.assertThat;
6 |
7 | import java.io.File;
8 | import java.io.FileWriter;
9 | import java.io.IOException;
10 |
11 | import org.junit.Before;
12 | import org.junit.Rule;
13 | import org.junit.Test;
14 | import org.junit.rules.TemporaryFolder;
15 |
16 | public class OutputBuilderTest {
17 |
18 | @Rule
19 | public final TemporaryFolder folder = new TemporaryFolder();
20 | private OutputBuilder builder;
21 | private File outputFolder;
22 |
23 | @Before
24 | public void setup() {
25 | outputFolder = folder.getRoot();
26 | builder = new OutputBuilder(outputFolder);
27 | }
28 |
29 | @Test
30 | public void testNewScreenShotFileIsWritable() throws IOException {
31 | FileWriter fwriter = new FileWriter(builder.newScreenShotFile("test"));
32 | fwriter.write("blabla");
33 | fwriter.close();
34 | }
35 |
36 | @Test
37 | public void whenDomPersistedTheLoadFunctionReturnsTheSameDom() {
38 | String dom = "Some DOM string";
39 | builder.persistDom("test-state", dom);
40 | assertThat(builder.getDom("test-state"), is(dom));
41 | }
42 |
43 | @Test
44 | public void whenNullPersistedTheDomIsPersistedAsEmpty() {
45 | builder.persistDom("test-state", null);
46 | assertThat(builder.getDom("test-state"), isEmptyString());
47 | }
48 |
49 | }
50 |
--------------------------------------------------------------------------------
/plugins/crawloverview-plugin/src/test/java/com/crawljax/plugins/crawloverview/TestBean.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.plugins.crawloverview;
2 |
3 | import java.util.concurrent.atomic.AtomicInteger;
4 |
5 | import com.google.common.collect.ImmutableList;
6 |
7 | public class TestBean {
8 |
9 | private final String someString = "A";
10 | private final int someInt = 123;
11 | private final ImmutableList stringList = ImmutableList.of("A", "B");
12 | private final ImmutableList objectList = ImmutableList
13 | .of(new AtomicInteger(42));
14 |
15 | public String getSomeString() {
16 | return someString;
17 | }
18 |
19 | public int getSomeInt() {
20 | return someInt;
21 | }
22 |
23 | public ImmutableList getStringList() {
24 | return stringList;
25 | }
26 |
27 | public ImmutableList getObjectList() {
28 | return objectList;
29 | }
30 |
31 | }
32 |
--------------------------------------------------------------------------------
/plugins/crawloverview-plugin/src/test/resources/hover-test-site/a.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Index
5 |
6 |
7 |
8 |
This is A
9 |
The link is in a div with a margin of 50px;
10 | Back to index
11 |
12 |
13 |
--------------------------------------------------------------------------------
/plugins/crawloverview-plugin/src/test/resources/hover-test-site/b.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | B
5 |
6 |
7 |
8 |
This is B
9 |
It's a div with an absolute position 50px from the edges. The anchor also has a margin of 10px;
10 |
Go to C
11 |
12 |
13 |
--------------------------------------------------------------------------------
/plugins/crawloverview-plugin/src/test/resources/hover-test-site/c.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | C
5 |
10 |
11 |
12 |
13 |
This is C
14 |
the div is aligned in the center, relative to the viewport
15 |
Go to B Go to Index
16 |
17 |
18 |
--------------------------------------------------------------------------------
/plugins/crawloverview-plugin/src/test/resources/hover-test-site/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Index
5 |
6 |
7 |
8 |
Index
9 |
This is the index. The site contains in total 4 sites without
10 | any Javascript but with some styling. They are linked using href
11 | links.
12 |
17 |
18 |
19 |
--------------------------------------------------------------------------------
/plugins/crawloverview-plugin/src/test/resources/logback-test.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 | %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
--------------------------------------------------------------------------------
/plugins/crawloverview-plugin/src/test/resources/screenshot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zaproxy/crawljax/4ac51f5f16bfab25edcc40abeb0b0233aa5798bc/plugins/crawloverview-plugin/src/test/resources/screenshot.png
--------------------------------------------------------------------------------
/plugins/test-plugin/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | 4.0.0
4 |
5 | org.zaproxy.crawljax.plugins
6 | crawljax-plugins-parent
7 | 3.8-SNAPSHOT
8 |
9 | test-plugin
10 | Test Plugin
11 | A plugin for testing
12 | http://crawljax.com
13 |
14 |
15 | org.zaproxy.crawljax
16 | crawljax-core
17 | ${project.version}
18 |
19 |
20 |
21 | ${project.artifactId}
22 |
23 |
24 | src/main/resources
25 |
26 | plugin-descriptor.xml
27 |
28 | true
29 |
30 |
31 | src/main/resources
32 |
33 | plugin-descriptor.xml
34 |
35 | false
36 |
37 |
38 |
39 |
40 |
41 |
42 |
--------------------------------------------------------------------------------
/plugins/test-plugin/src/main/java/com/crawljax/plugins/testplugin/TestPlugin.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.plugins.testplugin;
2 |
3 | import java.io.File;
4 | import java.io.FileWriter;
5 | import java.util.Map;
6 |
7 | import com.crawljax.core.CrawlerContext;
8 | import com.crawljax.core.configuration.CrawljaxConfiguration;
9 | import com.crawljax.core.plugin.HostInterface;
10 | import com.crawljax.core.plugin.OnNewStatePlugin;
11 | import com.crawljax.core.plugin.PreCrawlingPlugin;
12 | import com.crawljax.core.state.StateVertex;
13 |
14 | public class TestPlugin implements OnNewStatePlugin,
15 | PreCrawlingPlugin {
16 |
17 | private HostInterface hostInterface;
18 |
19 | public TestPlugin(HostInterface hostInterface) {
20 | this.hostInterface = hostInterface;
21 | }
22 |
23 | @Override
24 | public void onNewState(CrawlerContext context, StateVertex newState) {
25 | try {
26 | String dom = context.getBrowser().getStrippedDom();
27 | File file = new File(hostInterface.getOutputDirectory(), context.getCurrentState().getName() + ".html");
28 |
29 | FileWriter fw = new FileWriter(file, false);
30 | fw.write(dom);
31 | fw.close();
32 | } catch (Exception e) {
33 | e.printStackTrace();
34 | }
35 | }
36 |
37 | @Override
38 | public void preCrawling(CrawljaxConfiguration config) throws RuntimeException {
39 | try {
40 | File file = new File(hostInterface.getOutputDirectory(), "parameters.txt");
41 | FileWriter fw = new FileWriter(file, false);
42 | for(Map.Entry parameter : hostInterface.getParameters().entrySet()) {
43 | fw.write(parameter.getKey() + ": " + parameter.getValue() + System.getProperty("line.separator"));
44 | }
45 | fw.close();
46 | } catch (Exception e) {
47 | e.printStackTrace();
48 | }
49 | }
50 | }
51 |
--------------------------------------------------------------------------------
/plugins/test-plugin/src/main/resources/plugin-descriptor.xml:
--------------------------------------------------------------------------------
1 |
2 |
4 | Test Plugin
5 | A plugin to use for testing
6 |
7 | ${project.version}
8 |
9 |
10 |
11 | test_textbox
12 | Test Textbox
13 | textbox
14 |
15 |
16 | test_checkbox
17 | Test Checkbox
18 | checkbox
19 |
20 |
21 | test_select
22 | Test Select
23 | select
24 |
25 |
26 | Option 1
27 | 1
28 |
29 |
30 | Option 2
31 | 2
32 |
33 |
34 | Option 3
35 | 3
36 |
37 |
38 |
39 |
40 |
--------------------------------------------------------------------------------
/test-utils/README.md:
--------------------------------------------------------------------------------
1 | # Crawljax plugin test utils [](https://travis-ci.org/crawljax/crawljax-test-utils)
2 |
3 | This project offers Crawljax plugin developers a convenient way to test their plugins by offering a crawl of several default sites with known/expected output.
4 |
5 | The jar contains three sites to test you plugin against.
6 |
7 | 1. The simple site. This site has plain \ links.
8 | 2. The simple JavaScript site. This site uses JavaScript to switch between states.
9 | 3. The simple Input site. This site requires the crawler to enter the right value in an input box before it changes state.
10 |
11 | This project is open for extention to create other reusable Crawljax tests.
12 |
13 | ## Matchers
14 | The project also contains some [matchers](https://github.com/crawljax/crawljax-test-utils/tree/master/src/main/java/com/crawljax/matchers) you can use in your own tests. Add matchers to this project if you think they are reusable.
15 |
--------------------------------------------------------------------------------
/test-utils/src/main/java/com/crawljax/crawltests/SimpleInputSiteCrawl.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.crawltests;
2 |
3 | import org.eclipse.jetty.util.resource.Resource;
4 |
5 | import com.crawljax.core.configuration.CrawljaxConfiguration.CrawljaxConfigurationBuilder;
6 | import com.crawljax.core.configuration.InputField;
7 | import com.crawljax.core.configuration.InputSpecification;
8 | import com.crawljax.test.BaseCrawler;
9 |
10 | /**
11 | * Wraps a Crawljax instance the crawls the simplesite.
12 | */
13 | public class SimpleInputSiteCrawl extends BaseCrawler {
14 |
15 | public static final int NUMBER_OF_STATES = 2;
16 | public static final int NUMBER_OF_EDGES = 1;
17 |
18 | public SimpleInputSiteCrawl() {
19 | super(Resource.newClassPathResource("sites"), "simple-input-site");
20 | }
21 |
22 | @Override
23 | protected CrawljaxConfigurationBuilder newCrawlConfigurationBuilder() {
24 | CrawljaxConfigurationBuilder builder = super.newCrawlConfigurationBuilder();
25 | builder.crawlRules().setInputSpec(getInputSpecification());
26 | return builder;
27 | }
28 |
29 | /**
30 | * @return The {@link InputSpecification} for the input box in this crawl session. You can
31 | * override this methods to add more values. By default, it loads with two bad strings,
32 | * and one good string.
33 | */
34 | public InputSpecification getInputSpecification() {
35 | InputSpecification inputSpecification = new InputSpecification();
36 | InputField field = inputSpecification.field("input");
37 | field.setValue("Good input");
38 | field.setValue("This doesnt work");
39 | field.setValue("Neither does this");
40 |
41 | return inputSpecification;
42 | }
43 |
44 | }
45 |
--------------------------------------------------------------------------------
/test-utils/src/main/java/com/crawljax/crawltests/SimpleJsSiteCrawl.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.crawltests;
2 |
3 | import org.eclipse.jetty.util.resource.Resource;
4 |
5 | import com.crawljax.test.BaseCrawler;
6 |
7 | /**
8 | * Wraps a Crawljax instance the crawls the simplesite.
9 | */
10 | public class SimpleJsSiteCrawl extends BaseCrawler {
11 |
12 | public static final int NUMBER_OF_STATES = 11;
13 | public static final int NUMBER_OF_EDGES = 10;
14 |
15 | public SimpleJsSiteCrawl() {
16 | super(Resource.newClassPathResource("sites"), "simple-js-site");
17 | }
18 |
19 | }
20 |
--------------------------------------------------------------------------------
/test-utils/src/main/java/com/crawljax/crawltests/SimpleSiteCrawl.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.crawltests;
2 |
3 | import org.eclipse.jetty.util.resource.Resource;
4 |
5 | import com.crawljax.test.BaseCrawler;
6 |
7 | /**
8 | * Wraps a Crawljax instance the crawls the simplesite.
9 | */
10 | public class SimpleSiteCrawl extends BaseCrawler {
11 |
12 | public static final int NUMBER_OF_STATES = 4;
13 | public static final int NUMBER_OF_EDGES = 5;
14 |
15 | public SimpleSiteCrawl() {
16 | super(Resource.newClassPathResource("sites"), "simple-site");
17 | }
18 | }
19 |
--------------------------------------------------------------------------------
/test-utils/src/main/java/com/crawljax/crawltests/SimpleXpathCrawl.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.crawltests;
2 |
3 | import org.eclipse.jetty.util.resource.Resource;
4 |
5 | import com.crawljax.core.configuration.CrawljaxConfiguration.CrawljaxConfigurationBuilder;
6 | import com.crawljax.test.BaseCrawler;
7 |
8 | /**
9 | * Wraps a Crawljax instance the crawls the simplesite.
10 | */
11 | public class SimpleXpathCrawl extends BaseCrawler {
12 |
13 | public static final int NUMBER_OF_STATES = 3;
14 | public static final int NUMBER_OF_EDGES = 2;
15 |
16 | public SimpleXpathCrawl() {
17 | super(Resource.newClassPathResource("sites"), "simple-xpath-site");
18 | }
19 |
20 | @Override
21 | protected CrawljaxConfigurationBuilder newCrawlConfigurationBuilder() {
22 | CrawljaxConfigurationBuilder builder = super.newCrawlConfigurationBuilder();
23 | builder.crawlRules().click("a").underXPath("//A[@class='click']");
24 | builder.crawlRules().dontClickChildrenOf("div").withId("dontClick");
25 | return builder;
26 | }
27 |
28 | }
29 |
--------------------------------------------------------------------------------
/test-utils/src/main/java/com/crawljax/rules/TempDirInTargetFolder.java:
--------------------------------------------------------------------------------
1 | package com.crawljax.rules;
2 |
3 | import static com.google.common.base.Preconditions.checkArgument;
4 | import static org.apache.commons.io.FileUtils.deleteQuietly;
5 |
6 | import java.io.File;
7 | import java.text.SimpleDateFormat;
8 | import java.util.Date;
9 |
10 | import org.junit.rules.ExternalResource;
11 |
12 | public class TempDirInTargetFolder extends ExternalResource {
13 |
14 | private static final String DATE_FORMAT = "yyyy-MM-dd-HH.mm.ss";
15 | private final File target;
16 | private final String prefix;
17 | private final boolean override;
18 | private File tmpDir;
19 |
20 | public TempDirInTargetFolder(String prefix, boolean override) {
21 | this.prefix = prefix;
22 | this.override = override;
23 | target = new File("target/test-data");
24 | if (!target.exists()) {
25 | boolean created = target.mkdirs();
26 | checkArgument(created, "Could not create target/test-data dir");
27 | }
28 | }
29 |
30 | @Override
31 | protected void before() throws Throwable {
32 | if (override) {
33 | tmpDir = new File(target, prefix);
34 | if (tmpDir.exists()) {
35 | deleteQuietly(tmpDir);
36 | }
37 | } else {
38 | SimpleDateFormat format = new SimpleDateFormat(DATE_FORMAT);
39 | String suffix = format.format(new Date());
40 | tmpDir = new File(target, prefix + '-' + suffix);
41 | }
42 | boolean created = tmpDir.mkdirs();
43 | checkArgument(created, "Could not create tmpDir");
44 | }
45 |
46 | public File getTempDir() {
47 | return tmpDir;
48 | }
49 | }
50 |
--------------------------------------------------------------------------------
/test-utils/src/main/resources/sites/simple-input-site/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
Simple input test
9 |
If you fill in "Good input" the application will go to another
10 | state. All other input is disregarded.
11 |
12 |
13 | Hit me
14 |
21 |
22 |
23 |
--------------------------------------------------------------------------------
/test-utils/src/main/resources/sites/simple-input-site/otherState.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Other state
5 |
6 |
7 | Well done crawler! This is the other state.
8 |
9 |
--------------------------------------------------------------------------------
/test-utils/src/main/resources/sites/simple-js-site/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
21 |
22 |
23 |
26 |
27 |
28 |
--------------------------------------------------------------------------------
/test-utils/src/main/resources/sites/simple-js-site/payload_10.html:
--------------------------------------------------------------------------------
1 |
8 | S10
9 |
--------------------------------------------------------------------------------
/test-utils/src/main/resources/sites/simple-js-site/payload_11.html:
--------------------------------------------------------------------------------
1 | Final state S10
--------------------------------------------------------------------------------
/test-utils/src/main/resources/sites/simple-js-site/payload_2.html:
--------------------------------------------------------------------------------
1 | Final state S2
--------------------------------------------------------------------------------
/test-utils/src/main/resources/sites/simple-js-site/payload_3.html:
--------------------------------------------------------------------------------
1 |
11 | S3
12 | S6
13 |
--------------------------------------------------------------------------------
/test-utils/src/main/resources/sites/simple-js-site/payload_4.html:
--------------------------------------------------------------------------------
1 |
8 | S4
9 |
--------------------------------------------------------------------------------
/test-utils/src/main/resources/sites/simple-js-site/payload_5.html:
--------------------------------------------------------------------------------
1 |
8 | S5
9 |
--------------------------------------------------------------------------------
/test-utils/src/main/resources/sites/simple-js-site/payload_6.html:
--------------------------------------------------------------------------------
1 | Final state S5
--------------------------------------------------------------------------------
/test-utils/src/main/resources/sites/simple-js-site/payload_7.html:
--------------------------------------------------------------------------------
1 |
11 | S7
12 | S9
13 |
--------------------------------------------------------------------------------
/test-utils/src/main/resources/sites/simple-js-site/payload_8.html:
--------------------------------------------------------------------------------
1 |
8 | S8
9 |
--------------------------------------------------------------------------------
/test-utils/src/main/resources/sites/simple-js-site/payload_9.html:
--------------------------------------------------------------------------------
1 | Final state S8
--------------------------------------------------------------------------------
/test-utils/src/main/resources/sites/simple-site/a.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Index
5 |
6 |
7 | This is A
8 |
9 |
--------------------------------------------------------------------------------
/test-utils/src/main/resources/sites/simple-site/b.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | B
5 |
6 |
7 | This is B
8 | Go to C
9 |
10 |
--------------------------------------------------------------------------------
/test-utils/src/main/resources/sites/simple-site/c.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | C
5 |
6 |
7 | This is C
8 | Go to B
9 | Go to Index
10 |
11 |
--------------------------------------------------------------------------------
/test-utils/src/main/resources/sites/simple-site/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Index
5 |
6 |
7 |
8 |
Index
9 |
This is the index. The site contains in total 4 sites without
10 | any Javascript. They are linked using href links.
11 |
The site looks like this:
12 |
13 | Index -> a
14 | Index -> b
15 | b -> c
16 | c -> b
17 | c -> Index
18 |
19 |
20 | Go to A
21 | Go to B
22 |
23 |
--------------------------------------------------------------------------------
/test-utils/src/main/resources/sites/simple-xpath-site/a.html:
--------------------------------------------------------------------------------
1 |
2 |
3 | Index
4 |
5 |
6 | This is A
7 |
8 |
--------------------------------------------------------------------------------
/test-utils/src/main/resources/sites/simple-xpath-site/b.html:
--------------------------------------------------------------------------------
1 |
2 |
3 | B
4 |
5 |
6 | This is B
7 |
8 |
--------------------------------------------------------------------------------
/test-utils/src/main/resources/sites/simple-xpath-site/index-iframe.html:
--------------------------------------------------------------------------------
1 |
2 |
3 | iFrame
4 |
5 |
6 | I'm in an iFrame!
7 |
10 |
11 |
--------------------------------------------------------------------------------
/test-utils/src/main/resources/sites/simple-xpath-site/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 | Index
4 |
5 |
6 |
7 |
Index
8 |
This is the index. The site contains 3 sub sites:
9 |
10 | A is visible but has class 'noclick' so the Crawler should
11 | skip that.
12 | B is visible and has class 'click'
13 |
14 |
15 |
21 |
22 |
23 |
--------------------------------------------------------------------------------