├── .github
└── FUNDING.yml
├── bin
├── restart.sh
├── start.sh
├── stop.sh
└── start_loader_docker.sh
├── gradle
└── wrapper
│ ├── gradle-wrapper.jar
│ └── gradle-wrapper.properties
├── .dockerignore
├── .gitignore
├── .gitmodules
├── .settings
├── org.eclipse.jdt.core.prefs
└── org.eclipse.buildship.core.prefs
├── src
└── main
│ ├── resources
│ └── log4j.properties
│ └── java
│ └── net
│ └── yacy
│ └── grid
│ └── loader
│ ├── retrieval
│ ├── HttpClient.java
│ ├── JavaHttpClient.java
│ ├── LoaderClientConnection.java
│ ├── HtmlUnitLoader.java
│ ├── ContentLoader.java
│ └── FTPClient.java
│ ├── api
│ ├── LoaderService.java
│ └── ProcessService.java
│ ├── JwatWarcWriter.java
│ ├── Loader.java
│ └── LoaderListener.java
├── .project
├── Dockerfile
├── .classpath
├── conf
└── config.properties
├── README.md
├── gradlew.bat
└── gradlew
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | github: orbiter
2 | patreon: 0rb1t3r
3 |
--------------------------------------------------------------------------------
/bin/restart.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env sh
2 | cd "`dirname $0`"
3 | ./stop.sh
4 | sleep 1
5 | ./start.sh
6 |
7 |
--------------------------------------------------------------------------------
/gradle/wrapper/gradle-wrapper.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yacy/yacy_grid_loader/master/gradle/wrapper/gradle-wrapper.jar
--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 | .git
2 | .gitignore
3 | data
4 | build
5 | bin
6 | docker
7 | Dockerfile
8 | LICENSE.md
9 | README.md
10 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | classes/
2 | target/
3 | data/
4 | /class/
5 | /.gradle/
6 | /build/
7 | .DS_Store
8 | .settings
9 | .idea/
10 |
11 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "submodules/yacy_grid_mcp"]
2 | path = submodules/yacy_grid_mcp
3 | url = https://github.com/yacy/yacy_grid_mcp.git
4 |
--------------------------------------------------------------------------------
/bin/start.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env sh
2 | cd "`dirname $0`"
3 | cd ..
4 | nohup java -jar build/libs/yacy_grid_loader-0.0.1-SNAPSHOT-all.jar < /dev/null &
5 | sleep 1
6 | echo "YaCy Grid Loader started!"
7 |
--------------------------------------------------------------------------------
/.settings/org.eclipse.jdt.core.prefs:
--------------------------------------------------------------------------------
1 | eclipse.preferences.version=1
2 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8
3 | org.eclipse.jdt.core.compiler.compliance=1.8
4 | org.eclipse.jdt.core.compiler.source=1.8
5 |
--------------------------------------------------------------------------------
/gradle/wrapper/gradle-wrapper.properties:
--------------------------------------------------------------------------------
1 | distributionBase=GRADLE_USER_HOME
2 | distributionPath=wrapper/dists
3 | distributionUrl=https\://services.gradle.org/distributions/gradle-7.3-bin.zip
4 | zipStoreBase=GRADLE_USER_HOME
5 | zipStorePath=wrapper/dists
6 |
--------------------------------------------------------------------------------
/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | # Set root logger level to DEBUG and its only appender to A1.
2 | log4j.rootLogger=A1
3 |
4 | # A1 is set to be a ConsoleAppender.
5 | log4j.appender.A1=org.apache.log4j.ConsoleAppender
6 |
7 | # A1 uses PatternLayout.
8 | log4j.appender.A1.layout=org.apache.log4j.PatternLayout
9 | log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
10 |
11 | log4j.logger.org.eclipse.jetty = INFO
12 | log4j.logger.org.apache.http = INFO
13 |
--------------------------------------------------------------------------------
/bin/stop.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env sh
2 | cd "`dirname $0`"
3 | cd ../data
4 | KILLFILE="loader-8200.kill"
5 | PIDFILE="loader-8200.pid"
6 |
7 | # first method to terminate the process
8 | if [ -f "$KILLFILE" ];
9 | then
10 | rm $KILLFILE
11 | echo "termination requested, waiting.."
12 | # this can take 10 seconds..
13 | sleep 10
14 | fi
15 |
16 | # second method to terminate the process
17 | if [ -f "$PIDFILE" ];
18 | then
19 | fuser -k $PIDFILE
20 | fi
21 |
22 | # check if file does not exist any more which would be a sign that this has terminated
23 | if [ ! -f "$PIDFILE" ];
24 | then
25 | echo "process terminated"
26 | fi
27 |
28 |
--------------------------------------------------------------------------------
/.settings/org.eclipse.buildship.core.prefs:
--------------------------------------------------------------------------------
1 | arguments=
2 | auto.sync=false
3 | build.commands=org.eclipse.jdt.core.javabuilder
4 | build.scans.enabled=false
5 | connection.arguments=
6 | connection.gradle.distribution=GRADLE_DISTRIBUTION(VERSION(5.6.2))
7 | connection.java.home=null
8 | connection.jvm.arguments=
9 | connection.project.dir=
10 | derived.resources=.gradle,build
11 | eclipse.preferences.version=1
12 | gradle.user.home=
13 | java.home=
14 | jvm.arguments=
15 | natures=org.eclipse.jdt.core.javanature
16 | offline.mode=false
17 | override.workspace.settings=true
18 | project.path=\:
19 | show.console.view=true
20 | show.executions.view=true
21 |
--------------------------------------------------------------------------------
/.project:
--------------------------------------------------------------------------------
1 |
2 |
3 | yacy_grid_loader
4 | Project yacy_grid_loader created by Buildship.
5 |
6 |
7 |
8 |
9 | org.eclipse.jdt.core.javabuilder
10 |
11 |
12 |
13 |
14 | org.eclipse.buildship.core.gradleprojectbuilder
15 |
16 |
17 |
18 |
19 |
20 | org.eclipse.buildship.core.gradleprojectnature
21 | org.eclipse.jdt.core.javanature
22 |
23 |
24 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | ## yacy_grid_loader dockerfile
2 | ## examples:
3 | # docker build -t yacy_grid_loader .
4 | # docker run -d --rm -p 8200:8200 --name yacy_grid_loader yacy_grid_loader
5 | ## Check if the service is running:
6 | # curl http://localhost:8200/yacy/grid/mcp/info/status.json
7 |
8 | # build app
9 | FROM eclipse-temurin:8-jdk-focal AS appbuilder
10 | COPY ./ /app
11 | WORKDIR /app
12 | RUN ./gradlew clean shadowDistTar
13 |
14 | # build dist
15 | FROM eclipse-temurin:8-jre-focal
16 | LABEL maintainer="Michael Peter Christen "
17 | ENV DEBIAN_FRONTEND noninteractive
18 | ARG default_branch=master
19 | COPY ./conf /app/conf/
20 | COPY --from=appbuilder /app/build/libs/ ./app/build/libs/
21 | WORKDIR /app
22 | EXPOSE 8200
23 |
24 | # for some weird reason the jar file is sometimes not named correctly
25 | RUN if [ -e /app/build/libs/app-0.0.1-SNAPSHOT-all.jar ] ; then mv /app/build/libs/app-0.0.1-SNAPSHOT-all.jar /app/build/libs/yacy_grid_loader-0.0.1-SNAPSHOT-all.jar; fi
26 |
27 | CMD ["java", "-Xms320M", "-Xmx3G", "-jar", "/app/build/libs/yacy_grid_loader-0.0.1-SNAPSHOT-all.jar"]
28 |
--------------------------------------------------------------------------------
/.classpath:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
--------------------------------------------------------------------------------
/src/main/java/net/yacy/grid/loader/retrieval/HttpClient.java:
--------------------------------------------------------------------------------
1 | /**
2 | * HttpClient
3 | * Copyright 24.2.2018 by Michael Peter Christen, @0rb1t3r
4 | *
5 | * This library is free software; you can redistribute it and/or
6 | * modify it under the terms of the GNU Lesser General Public
7 | * License as published by the Free Software Foundation; either
8 | * version 2.1 of the License, or (at your option) any later version.
9 | *
10 | * This library is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 | * Lesser General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Lesser General Public License
16 | * along with this program in the file lgpl21.txt
17 | * If not, see .
18 | */
19 |
20 | package net.yacy.grid.loader.retrieval;
21 |
22 | import java.util.List;
23 | import java.util.Map;
24 |
25 | public interface HttpClient {
26 |
27 | public int getStatusCode();
28 |
29 | public String getMime();
30 |
31 | public Map> getHeader();
32 |
33 | public String getRequestHeader();
34 |
35 | public String getResponseHeader();
36 |
37 | public byte[] getContent();
38 | }
39 |
--------------------------------------------------------------------------------
/bin/start_loader_docker.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | cd "`dirname $0`"
3 |
4 | bindhost="127.0.0.1"
5 | callhost="localhost"
6 | appname="YaCy Grid Loader"
7 | containername=yacy-grid-loader
8 | imagename=${containername//-/_}
9 | dockerfile="Dockerfile"
10 | production=false
11 | open=false
12 |
13 | usage() { echo "usage: $0 [-o | --open | -p | --production | --arm32 | --arm64 ]" 1>&2; exit 1; }
14 |
15 | while [[ $# -gt 0 ]]; do
16 | case "$1" in
17 | -p | --production ) production=true; shift 1;;
18 | -o | --open ) open=true; shift 1;;
19 | --arm32 ) imagename=${imagename}:arm32; dockerfile=${dockerfile}_arm32; shift 1;;
20 | --arm64 ) imagename=${imagename}:arm64; dockerfile=${dockerfile}_arm64; shift 1;;
21 | -h | --help | -* | --* | * ) usage;;
22 | esac
23 | done
24 | if [ "$production" = true ] ; then imagename="yacy/${imagename}"; fi
25 | if [ "$open" = true ] ; then bindhost="0.0.0.0"; callhost=`hostname`; fi
26 |
27 | containerRuns=$(docker ps | grep -i "${containername}" | wc -l )
28 | containerExists=$(docker ps -a | grep -i "${containername}" | wc -l )
29 | if [ ${containerRuns} -gt 0 ]; then
30 | echo "${appname} container is already running"
31 | elif [ ${containerExists} -gt 0 ]; then
32 | docker start ${containername}
33 | echo "${appname} container re-started"
34 | else
35 | if [[ $imagename != "yacy/"*":latest" ]] && [[ "$(docker images -q ${imagename} 2> /dev/null)" == "" ]]; then
36 | cd ..
37 | docker build -t ${imagename} -f ${dockerfile} .
38 | cd bin
39 | fi
40 | docker run -d --restart=unless-stopped -p ${bindhost}:8200:8200 \
41 | --link yacy-grid-minio --link yacy-grid-rabbitmq --link yacy-grid-elasticsearch --link yacy-grid-mcp \
42 | -e YACYGRID_GRID_MCP_ADDRESS=yacy-grid-mcp \
43 | --name ${containername} ${imagename}
44 | echo "${appname} started."
45 | fi
46 | docker ps -a --format "table {{.ID}}\t{{.Image}}\t{{.Names}}\t{{.Mounts}}\t{{.Ports}}"
47 |
48 | echo "To get the app status, open http://${callhost}:8200/yacy/grid/mcp/info/status.json"
49 |
--------------------------------------------------------------------------------
/conf/config.properties:
--------------------------------------------------------------------------------
1 | port = 8200
2 | grid.mcp.address = 127.0.0.1:8100,node00.local:8100,brain.local:8100,searchlab.eu:8100
3 | grid.broker.lazy = true
4 | grid.broker.queue.limit = 0
5 | grid.broker.queue.throttling = 100000
6 | grid.assets.delete = true
7 | grid.loader.disableHeadless = false
8 |
9 | # setting for the user agent type:
10 | # the type is either CUSTOM, YACY, GOOGLE or BROWSER. That means:
11 | # - CUSTOM : user your own user agent. The name must be set in the property grid.loader.userAgentName
12 | # - YACY : use the YaCyBot user agent, i.e. "yacybot (v2 sysinfo) http://yacy.net/bot.html"
13 | # - GOOGLE : the googlebot user agent, i.e. "Googlebot/2.1 (+http://www.google.com/bot.html)"
14 | # - BROWSER : a random browser user agent, i.e. "Mozilla/5.0 (Windows NT 5.1; rv:22.0) Gecko/20100101 Firefox/22.0"
15 | grid.loader.userAgentType = BROWSER
16 |
17 |
18 |
19 | ####################################################################
20 | ## The following properties must be identical to those in the MCP ##
21 | ####################################################################
22 |
23 | # The grid name is used to separate different grid networks.
24 | # Only networks with the same name connect with each other
25 | grid.name = freeworld
26 |
27 | # Index names of the grid indexes:
28 | # crawlstart : a history of all crawl starts
29 | # crawler : tracking of crawling progress
30 | # query : a history of all queries
31 | # web : the document search index ("web index", there)
32 | grid.elasticsearch.indexName.crawlstart = crawlstart
33 | grid.elasticsearch.indexName.crawler = crawler
34 | grid.elasticsearch.indexName.query = query
35 | grid.elasticsearch.indexName.web = web
36 |
37 | # the following type name is an intermediate solution to migrate from elastic 6.x to 8.x
38 | # unfortunately the current index type name is 'web' but in future elastic versions the name '_doc'
39 | # is mandatory. We will use this setting until migration to elastic 8.x is complete and delete
40 | # the configuration afterwards.
41 | grid.elasticsearch.typeName = web
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # YaCy Grid Component: Loader
2 |
3 | The YaCy Grid is the second-generation implementation of YaCy, a peer-to-peer search engine.
4 | A YaCy Grid installation consists of a set of micro-services which communicate with each other
5 | using the MCP, see https://github.com/yacy/yacy_grid_mcp
6 |
7 | ## Purpose
8 |
9 | The Loader is a microservice which can be deployed i.e. using Docker.
10 | Each search engine needs a file loader and this component will do that work.
11 | The special feature of this loader is it's embedded headless browser which makes
12 | it possible to load rich content and provide that content for a search engine.
13 |
14 | ## What it does
15 |
16 | When the Loader component is started, it searches for a MCP and connects to it.
17 | By default the local host is searched for a MCP but you can configure one yourself.
18 |
19 | The Loader will then wait for client requests and performs web loading upon request.
20 | It also has a MCP queue listener to react on loading requests in the working queues.
21 | After loading of content the loader will push back results to the MCP storage and puts
22 | another message on the MCP message queue to process the loaded content.
23 |
24 | ## Installation: Download, Build, Run
25 | At this time, yacy_grid_parser is not provided in compiled form, you easily build it yourself. It's not difficult and done in one minute! The source code is hosted at https://github.com/yacy/yacy_grid_loader, you can download and run it with:
26 |
27 | > git clone --recursive https://github.com/yacy/yacy_grid_loader.git
28 |
29 | If you just want to make a update, do the following
30 |
31 | > git pull origin master
32 | > git submodule foreach git pull origin master
33 |
34 | To build and start the loader, run
35 |
36 | > cd yacy_grid_loader
37 | > gradle run
38 |
39 | Please read also https://github.com/yacy/yacy_grid_mcp/blob/master/README.md for further details.
40 |
41 |
42 | ## Contribute
43 |
44 | This is a community project and your contribution is welcome!
45 |
46 | 1. Check for [open issues](https://github.com/yacy/yacy_grid_loader/issues)
47 | or open a fresh one to start a discussion around a feature idea or a bug.
48 | 2. Fork [the repository](https://github.com/yacy/yacy_grid_loader.git)
49 | on GitHub to start making your changes (branch off of the master branch).
50 | 3. Write a test that shows the bug was fixed or the feature works as expected.
51 | 4. Send a pull request and bug us on Gitter until it gets merged and published. :)
52 |
53 |
54 | ## What is the software license?
55 | LGPL 2.1
56 |
57 | Have fun!
58 |
59 | @0rb1t3r
60 |
--------------------------------------------------------------------------------
/gradlew.bat:
--------------------------------------------------------------------------------
1 | @rem
2 | @rem Copyright 2015 the original author or authors.
3 | @rem
4 | @rem Licensed under the Apache License, Version 2.0 (the "License");
5 | @rem you may not use this file except in compliance with the License.
6 | @rem You may obtain a copy of the License at
7 | @rem
8 | @rem https://www.apache.org/licenses/LICENSE-2.0
9 | @rem
10 | @rem Unless required by applicable law or agreed to in writing, software
11 | @rem distributed under the License is distributed on an "AS IS" BASIS,
12 | @rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | @rem See the License for the specific language governing permissions and
14 | @rem limitations under the License.
15 | @rem
16 |
17 | @if "%DEBUG%" == "" @echo off
18 | @rem ##########################################################################
19 | @rem
20 | @rem Gradle startup script for Windows
21 | @rem
22 | @rem ##########################################################################
23 |
24 | @rem Set local scope for the variables with windows NT shell
25 | if "%OS%"=="Windows_NT" setlocal
26 |
27 | set DIRNAME=%~dp0
28 | if "%DIRNAME%" == "" set DIRNAME=.
29 | set APP_BASE_NAME=%~n0
30 | set APP_HOME=%DIRNAME%
31 |
32 | @rem Resolve any "." and ".." in APP_HOME to make it shorter.
33 | for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi
34 |
35 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
36 | set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"
37 |
38 | @rem Find java.exe
39 | if defined JAVA_HOME goto findJavaFromJavaHome
40 |
41 | set JAVA_EXE=java.exe
42 | %JAVA_EXE% -version >NUL 2>&1
43 | if "%ERRORLEVEL%" == "0" goto execute
44 |
45 | echo.
46 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
47 | echo.
48 | echo Please set the JAVA_HOME variable in your environment to match the
49 | echo location of your Java installation.
50 |
51 | goto fail
52 |
53 | :findJavaFromJavaHome
54 | set JAVA_HOME=%JAVA_HOME:"=%
55 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe
56 |
57 | if exist "%JAVA_EXE%" goto execute
58 |
59 | echo.
60 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
61 | echo.
62 | echo Please set the JAVA_HOME variable in your environment to match the
63 | echo location of your Java installation.
64 |
65 | goto fail
66 |
67 | :execute
68 | @rem Setup the command line
69 |
70 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
71 |
72 |
73 | @rem Execute Gradle
74 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*
75 |
76 | :end
77 | @rem End local scope for the variables with windows NT shell
78 | if "%ERRORLEVEL%"=="0" goto mainEnd
79 |
80 | :fail
81 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
82 | rem the _cmd.exe /c_ return code!
83 | if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
84 | exit /b 1
85 |
86 | :mainEnd
87 | if "%OS%"=="Windows_NT" endlocal
88 |
89 | :omega
90 |
--------------------------------------------------------------------------------
/src/main/java/net/yacy/grid/loader/api/LoaderService.java:
--------------------------------------------------------------------------------
1 | /**
2 | * LoaderService
3 | * Copyright 25.4.2017 by Michael Peter Christen, @0rb1t3r
4 | *
5 | * This library is free software; you can redistribute it and/or
6 | * modify it under the terms of the GNU Lesser General Public
7 | * License as published by the Free Software Foundation; either
8 | * version 2.1 of the License, or (at your option) any later version.
9 | *
10 | * This library is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 | * Lesser General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Lesser General Public License
16 | * along with this program in the file lgpl21.txt
17 | * If not, see .
18 | */
19 |
20 | package net.yacy.grid.loader.api;
21 |
22 | import javax.servlet.http.HttpServletResponse;
23 |
24 | import org.json.JSONArray;
25 | import org.json.JSONObject;
26 |
27 | import ai.susi.mind.SusiAction;
28 | import ai.susi.mind.SusiThought;
29 | import net.yacy.grid.http.APIHandler;
30 | import net.yacy.grid.http.ObjectAPIHandler;
31 | import net.yacy.grid.http.Query;
32 | import net.yacy.grid.http.ServiceResponse;
33 | import net.yacy.grid.loader.retrieval.ContentLoader;
34 |
35 | /**
36 | *
37 | * Test URL:
38 | * http://localhost:8200/yacy/grid/loader/warcloader.warc.gz?url=http://yacy.net
39 | *
40 | * Test command:
41 | * curl -o yacy.net.warc.gz "http://localhost:8200/yacy/grid/loader/warcloader.warc.gz?collection=test&url=http://yacy.net"
42 | * parse this warc with:
43 | * curl -X POST -F "sourcebytes=@yacy.net.warc.gz;type=application/octet-stream" http://127.0.0.1:8500/yacy/grid/parser/parser.json
44 | */
45 | public class LoaderService extends ObjectAPIHandler implements APIHandler {
46 |
47 | private static final long serialVersionUID = 8578474303031749879L;
48 | public static final String NAME = "warcloader";
49 |
50 | @Override
51 | public String getAPIPath() {
52 | return "/yacy/grid/loader/" + NAME + ".warc.gz";
53 | }
54 |
55 | @Override
56 | public ServiceResponse serviceImpl(Query call, HttpServletResponse response) {
57 | // construct the same process as if it was submitted on a queue
58 | SusiThought process = ProcessService.queryToProcess(call);
59 |
60 | // extract call parameter here to enhance ability to debug
61 | SusiAction action = process.getActions().get(0);
62 | JSONArray processData = process.getData();
63 |
64 | // find out if we should do headless loading
65 | String crawlID = action.getStringAttr("id");
66 | JSONObject crawl = SusiThought.selectData(processData, "id", crawlID);
67 | int depth = action.getIntAttr("depth");
68 | int crawlingDepth = crawl.getInt("crawlingDepth");
69 | int priority = crawl.has("priority") ? crawl.getInt("priority") : 0;
70 | boolean loaderHeadless = crawl.has("loaderHeadless") ? crawl.getBoolean("loaderHeadless") : true;
71 |
72 | // construct a WARC
73 | String threadname = "api call from " + call.getClientHost();
74 | ContentLoader cl = new ContentLoader(action, processData, true, threadname, crawlID, depth, crawlingDepth, loaderHeadless, priority);
75 | byte[] b = cl.getContent();
76 |
77 | // store the WARC as asset if wanted
78 | return new ServiceResponse(b);
79 | }
80 |
81 | }
82 |
83 |
--------------------------------------------------------------------------------
/src/main/java/net/yacy/grid/loader/retrieval/JavaHttpClient.java:
--------------------------------------------------------------------------------
1 | package net.yacy.grid.loader.retrieval;
2 |
3 | import java.io.BufferedReader;
4 | import java.io.IOException;
5 | import java.io.InputStream;
6 | import java.io.InputStreamReader;
7 | import java.net.HttpURLConnection;
8 | import java.net.URL;
9 | import java.nio.charset.StandardCharsets;
10 | import java.util.List;
11 | import java.util.Map;
12 |
13 | import org.apache.http.Header;
14 | import org.apache.http.RequestLine;
15 |
16 | import net.yacy.grid.http.ClientConnection;
17 | import net.yacy.grid.http.ClientIdentification;
18 |
19 | public class JavaHttpClient implements HttpClient {
20 |
21 | private static final String CRLF = new String(ClientConnection.CRLF, StandardCharsets.US_ASCII);
22 | private static String userAgentDefault = ClientIdentification.browserAgent.userAgent;
23 |
24 | private int status_code;
25 | private String mime;
26 | private Map> header;
27 | private String requestHeader, responseHeader;
28 | private byte[] content;
29 |
30 | public static void initClient(String userAgent) {
31 | userAgentDefault = userAgent;
32 | }
33 |
34 | public JavaHttpClient(String url, boolean head) throws IOException {
35 |
36 | HttpURLConnection connection = ((HttpURLConnection) new URL(url).openConnection());
37 | if (head) connection.setRequestMethod("HEAD");
38 | connection.addRequestProperty("User-Agent", userAgentDefault);
39 |
40 | // compute the request header (we do this to have a documentation later of what we did)
41 | Map> map = connection.getRequestProperties();
42 | StringBuffer sb = new StringBuffer();
43 | String special = connection.getHeaderField(0);
44 | sb.append(connection.getRequestMethod() + " " + url).append(CRLF);
45 | for (Map.Entry> entry: connection.getRequestProperties().entrySet()) {
46 | String key = entry.getKey();
47 | for (String value: entry.getValue()) {
48 | sb.append(key).append(": ").append(value).append(CRLF);
49 | }
50 | }
51 | sb.append(CRLF);
52 | this.requestHeader = sb.toString();
53 |
54 |
55 | InputStream input;
56 | if (connection.getResponseCode() == 200) // this must be called before 'getErrorStream()' works
57 | input = connection.getInputStream();
58 | else input = connection.getErrorStream();
59 | BufferedReader reader = new BufferedReader(new InputStreamReader(input));
60 | String msg;
61 | while ((msg =reader.readLine()) != null)
62 | System.out.println(msg);
63 | }
64 |
65 |
66 | @Override
67 | public int getStatusCode() {
68 | return status_code;
69 | }
70 |
71 | @Override
72 | public String getMime() {
73 | return mime;
74 | }
75 |
76 | @Override
77 | public Map> getHeader() {
78 | return header;
79 | }
80 |
81 | @Override
82 | public String getRequestHeader() {
83 | return requestHeader;
84 | }
85 |
86 | @Override
87 | public String getResponseHeader() {
88 | return responseHeader;
89 | }
90 |
91 | @Override
92 | public byte[] getContent() {
93 | return this.content;
94 | }
95 |
96 | public static void main(String[] args) {
97 | try {
98 | JavaHttpClient client = new JavaHttpClient("https://krefeld.polizei.nrw/", true);
99 | } catch (IOException e) {
100 | e.printStackTrace();
101 | }
102 | }
103 | }
104 |
--------------------------------------------------------------------------------
/src/main/java/net/yacy/grid/loader/JwatWarcWriter.java:
--------------------------------------------------------------------------------
1 | /**
2 | * JwatWarcWriter
3 | * Copyright 11.5.2017 by Michael Peter Christen, @0rb1t3r
4 | *
5 | * This library is free software; you can redistribute it and/or
6 | * modify it under the terms of the GNU Lesser General Public
7 | * License as published by the Free Software Foundation; either
8 | * version 2.1 of the License, or (at your option) any later version.
9 | *
10 | * This library is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 | * Lesser General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Lesser General Public License
16 | * along with this program in the file lgpl21.txt
17 | * If not, see .
18 | */
19 |
20 | package net.yacy.grid.loader;
21 |
22 | import java.io.ByteArrayInputStream;
23 | import java.io.IOException;
24 | import java.security.MessageDigest;
25 | import java.security.NoSuchAlgorithmException;
26 | import java.util.Date;
27 |
28 | import org.apache.commons.codec.binary.Base32;
29 | import org.jwat.warc.WarcRecord;
30 | import org.jwat.warc.WarcWriter;
31 |
32 | import net.yacy.grid.tools.DateParser;
33 | import net.yacy.grid.tools.Logger;
34 |
35 | /**
36 | * for a documentation, see https://iipc.github.io/warc-specifications/specifications/warc-format/warc-1.1/
37 | * @author admin
38 | *
39 | */
40 | public class JwatWarcWriter {
41 |
42 | public static void writeWarcinfo(final WarcWriter writer, final Date date, final String warcinfo_uuid, final String filename, final byte[] payload) throws IOException {
43 | try {
44 | final WarcRecord record = WarcRecord.createRecord(writer);
45 | record.header.addHeader("WARC-Type", "warcinfo");
46 | if (warcinfo_uuid != null) record.header.addHeader("WARC-Record-ID", "");
47 | record.header.addHeader("WARC-Date", DateParser.iso8601Format.format(date));
48 | if (filename != null) record.header.addHeader("WARC-Filename", filename);
49 | record.header.addHeader("Content-Length", Long.toString(payload.length));
50 | record.header.addHeader("Content-Type", "application/warc-fields");
51 | writer.writeHeader(record);
52 | final ByteArrayInputStream inBytes = new ByteArrayInputStream(payload);
53 | writer.streamPayload(inBytes);
54 | writer.closeRecord(); // java.lang.NoSuchMethodError: java.nio.ByteBuffer.flip()Ljava/nio/ByteBuffer;
55 | } catch (final NoSuchMethodError e) {
56 | Logger.warn(e);
57 | throw new IOException(e.getMessage());
58 | // the writer may fail because of a java 8 class error
59 | /*
60 | java.lang.NoSuchMethodError: java.nio.ByteBuffer.flip()Ljava/nio/ByteBuffer;
61 | at org.jwat.gzip.GzipWriter$GzipEntryOutputStream.close(GzipWriter.java:513)
62 | at org.jwat.gzip.GzipEntry.close(GzipEntry.java:142)
63 | at org.jwat.warc.WarcWriterCompressed.closeRecord(WarcWriterCompressed.java:100)
64 | */
65 | }
66 | }
67 |
68 | public static void writeRequest(final WarcWriter writer, final String url, final String ip, final Date date, final String warcrecord_uuid, final String warcinfo_uuid, final byte[] payload) throws IOException {
69 | final WarcRecord record = WarcRecord.createRecord(writer);
70 | record.header.addHeader("WARC-Type", "request");
71 | record.header.addHeader("WARC-Target-URI", url);
72 | record.header.addHeader("Content-Type", "application/http;msgtype=request");
73 | record.header.addHeader("WARC-Date", DateParser.iso8601Format.format(date));
74 | if (warcrecord_uuid != null) record.header.addHeader("WARC-Record-ID", "");
75 | if (ip != null) record.header.addHeader("WARC-IP-Address", ip);
76 | if (warcinfo_uuid != null) record.header.addHeader("WARC-Warcinfo-ID", "");
77 | //record.header.addHeader("WARC-Block-Digest", "sha1:" + sha1(payload));
78 | record.header.addHeader("Content-Length", Long.toString(payload.length));
79 | writer.writeHeader(record);
80 | final ByteArrayInputStream inBytes = new ByteArrayInputStream(payload);
81 | writer.streamPayload(inBytes);
82 | writer.closeRecord();
83 | }
84 |
85 | public static void writeResponse(final WarcWriter writer, final String url, final String ip, final Date date, final String warcrecord_uuid, final String warcinfo_uuid, final byte[] payload) throws IOException {
86 | final WarcRecord record = WarcRecord.createRecord(writer);
87 | record.header.addHeader("WARC-Type", "response");
88 | if (warcrecord_uuid != null) record.header.addHeader("WARC-Record-ID", "");
89 | if (warcinfo_uuid != null) record.header.addHeader("WARC-Warcinfo-ID", "");
90 | record.header.addHeader("WARC-Target-URI", url);
91 | record.header.addHeader("WARC-Date", DateParser.iso8601Format.format(date));
92 | if (ip != null) record.header.addHeader("WARC-IP-Address", ip);
93 | //record.header.addHeader("WARC-Block-Digest", "sha1:" + sha1(payload));
94 | //record.header.addHeader("WARC-Payload-Digest", "sha1:" + sha1(payload));
95 | record.header.addHeader("Content-Type", "application/http;msgtype=response");
96 | record.header.addHeader("Content-Length", Long.toString(payload.length));
97 | writer.writeHeader(record);
98 | final ByteArrayInputStream inBytes = new ByteArrayInputStream(payload);
99 | writer.streamPayload(inBytes);
100 | writer.closeRecord();
101 | }
102 |
103 | /**
104 | * compute a sha1 in base32 format
105 | * We choosed that format, because WGET does the same
106 | * @param b
107 | * @return a base32 string for the sha1 of the input
108 | */
109 | public static String sha1(final byte[] b) {
110 | try {
111 | final MessageDigest sha1 = MessageDigest.getInstance("SHA-1");
112 | sha1.reset();
113 | sha1.update(b);
114 | return new Base32().encodeAsString(b);
115 | } catch (final NoSuchAlgorithmException e) {
116 | e.printStackTrace();
117 | return "";
118 | }
119 | }
120 | }
121 |
--------------------------------------------------------------------------------
/src/main/java/net/yacy/grid/loader/api/ProcessService.java:
--------------------------------------------------------------------------------
1 | /**
2 | * LoaderService
3 | * Copyright 25.4.2017 by Michael Peter Christen, @0rb1t3r
4 | *
5 | * This library is free software; you can redistribute it and/or
6 | * modify it under the terms of the GNU Lesser General Public
7 | * License as published by the Free Software Foundation; either
8 | * version 2.1 of the License, or (at your option) any later version.
9 | *
10 | * This library is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 | * Lesser General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Lesser General Public License
16 | * along with this program in the file lgpl21.txt
17 | * If not, see .
18 | */
19 |
20 | package net.yacy.grid.loader.api;
21 |
22 | import java.io.IOException;
23 |
24 | import javax.servlet.http.HttpServletResponse;
25 |
26 | import org.json.JSONArray;
27 | import org.json.JSONObject;
28 |
29 | import ai.susi.mind.SusiAction;
30 | import ai.susi.mind.SusiAction.RenderType;
31 | import ai.susi.mind.SusiThought;
32 | import net.yacy.grid.http.APIHandler;
33 | import net.yacy.grid.http.ObjectAPIHandler;
34 | import net.yacy.grid.http.Query;
35 | import net.yacy.grid.http.ServiceResponse;
36 | import net.yacy.grid.loader.retrieval.ContentLoader;
37 | import net.yacy.grid.mcp.Service;
38 |
39 | /**
40 | *
41 | * Test URL:
42 | * http://localhost:8200/yacy/grid/loader/warcloader.warc.gz?url=http://yacy.net
43 | *
44 | * Test command:
45 | * curl "http://localhost:8200/yacy/grid/loader/warcprocess.json?collection=test&targetasset=test/yacy.net.warc.gz&url=http://yacy.net"
46 | * places the warc file on the asset store
47 | */
48 | public class ProcessService extends ObjectAPIHandler implements APIHandler {
49 |
50 | private static final long serialVersionUID = 8578474303031749879L;
51 | public static final String NAME = "warcprocess";
52 |
53 | @Override
54 | public String getAPIPath() {
55 | return "/yacy/grid/loader/" + NAME + ".json";
56 | }
57 |
58 | @Override
59 | public ServiceResponse serviceImpl(final Query call, final HttpServletResponse response) {
60 | // construct the same process as if it was submitted on a queue
61 | final SusiThought process = queryToProcess(call);
62 | final SusiAction action = process.getActions().iterator().next();
63 | final JSONArray data = process.getData();
64 |
65 | // find out if we should do headless loading
66 | final String crawlID = action.getStringAttr("id");
67 | final JSONObject crawl = SusiThought.selectData(data, "id", crawlID);
68 | final int depth = action.getIntAttr("depth");
69 | final int crawlingDepth = crawl.getInt("crawlingDepth");
70 | final int priority = crawl.has("priority") ? crawl.getInt("priority") : 0;
71 | final boolean loaderHeadless = crawl.has("loaderHeadless") ? crawl.getBoolean("loaderHeadless") : true;
72 |
73 | // construct a WARC
74 | final String targetasset = process.getObservation("targetasset");
75 | final ContentLoader cl = new ContentLoader(
76 | process.getActions().get(0), process.getData(), targetasset.endsWith(".gz"), "api call from " + call.getClientHost(),
77 | crawlID, depth, crawlingDepth, loaderHeadless, priority);
78 | final byte[] b = cl.getContent();
79 |
80 | // store the WARC as asset if wanted
81 | final JSONObject json = new JSONObject(true);
82 | if (targetasset != null && targetasset.length() > 0) {
83 | try {
84 | Service.instance.config.gridStorage.store(targetasset, b);
85 | json.put(ObjectAPIHandler.SUCCESS_KEY, true);
86 | json.put(ObjectAPIHandler.COMMENT_KEY, "asset stored");
87 | } catch (final IOException e) {
88 | e.printStackTrace();
89 | json.put(ObjectAPIHandler.SUCCESS_KEY, false);
90 | json.put(ObjectAPIHandler.COMMENT_KEY, e.getMessage());
91 | }
92 | } else {
93 | json.put(ObjectAPIHandler.SUCCESS_KEY, false);
94 | json.put(ObjectAPIHandler.COMMENT_KEY, "this process requires a 'targetasset' attribute");
95 | }
96 | return new ServiceResponse(json);
97 | }
98 |
99 | public static SusiThought queryToProcess(final Query call) {
100 | // read query attributes
101 | final String id = call.get("id", "*id*"); // the crawl id
102 | String url = call.get("url", "");
103 | final int urlCount = call.get("urlCount", 0);
104 | final int depth = call.get("depth", 0);
105 | final int crawlingDepth = call.get("crawlingDepth", 0); // the maximum depth for the crawl start of this domain
106 | final boolean loaderHeadless = call.get("loaderHeadless", false);
107 | final int priority = call.get("priority", 0);
108 | final String collection = call.get("collection", "");
109 | final String targetasset = call.get("targetasset", "");
110 |
111 | // construct an object that could be taken from the queue server
112 | final SusiThought process = new SusiThought();
113 | process.setProcess("yacy_grid_loader");
114 | if (collection.length() > 0) process.addObservation("collection", collection);
115 |
116 | final JSONObject crawl = new JSONObject();
117 | crawl.put("id", id);
118 | crawl.put("start_url", url);
119 | crawl.put("crawlingDepth", crawlingDepth);
120 | crawl.put("priority", priority);
121 | crawl.put("loaderHeadless", loaderHeadless);
122 |
123 | // create action
124 | final JSONObject action = new JSONObject();
125 | final JSONArray urls = new JSONArray();
126 | if (url.length() > 0) urls.put(url);
127 | if (urlCount > 0) for (int i = 0; i < urlCount; i++) {
128 | url = call.get("url_" + i, "");
129 | if (url.length() > 0) urls.put(url);
130 | }
131 | action.put("id", id);
132 | action.put("type", RenderType.loader.name());
133 | action.put("queue", "loader");
134 | action.put("urls", urls);
135 | action.put("depth", depth);
136 | if (collection.length() > 0) action.put("collection", collection);
137 | if (targetasset.length() > 0) action.put("targetasset", targetasset);
138 | process.addAction(new SusiAction(action));
139 | process.setData(new JSONArray().put(crawl));
140 |
141 | return process;
142 | }
143 |
144 | }
145 |
146 |
--------------------------------------------------------------------------------
/src/main/java/net/yacy/grid/loader/Loader.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Loader
3 | * Copyright 25.04.2017 by Michael Peter Christen, @0rb1t3r
4 | *
5 | * This library is free software; you can redistribute it and/or
6 | * modify it under the terms of the GNU Lesser General Public
7 | * License as published by the Free Software Foundation; either
8 | * version 2.1 of the License, or (at your option) any later version.
9 | *
10 | * This library is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 | * Lesser General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Lesser General Public License
16 | * along with this program in the file lgpl21.txt
17 | * If not, see .
18 | */
19 |
20 | package net.yacy.grid.loader;
21 |
22 | import java.util.ArrayList;
23 | import java.util.Arrays;
24 | import java.util.List;
25 |
26 | import javax.servlet.Servlet;
27 |
28 | import net.yacy.grid.YaCyServices;
29 | import net.yacy.grid.http.ClientConnection;
30 | import net.yacy.grid.http.ClientIdentification;
31 | import net.yacy.grid.loader.api.LoaderService;
32 | import net.yacy.grid.loader.api.ProcessService;
33 | import net.yacy.grid.loader.retrieval.LoaderClientConnection;
34 | import net.yacy.grid.mcp.BrokerListener;
35 | import net.yacy.grid.mcp.Configuration;
36 | import net.yacy.grid.mcp.MCP;
37 | import net.yacy.grid.mcp.Service;
38 | import net.yacy.grid.tools.CronBox;
39 | import net.yacy.grid.tools.CronBox.Telemetry;
40 | import net.yacy.grid.tools.Logger;
41 |
42 | /**
43 | * The Loader main class
44 | *
45 | * performance debugging:
46 | * http://localhost:8200/yacy/grid/mcp/info/threaddump.txt
47 | * http://localhost:8200/yacy/grid/mcp/info/threaddump.txt?count=100 *
48 | */
49 | public class Loader {
50 |
51 | private final static YaCyServices LOADER_SERVICE = YaCyServices.loader; // check with http://localhost:8200/yacy/grid/mcp/status.json
52 | private final static String DATA_PATH = "data";
53 |
54 | // define services
55 | @SuppressWarnings("unchecked")
56 | public final static Class extends Servlet>[] LOADER_SERVICES = new Class[]{
57 | // app services
58 | LoaderService.class,
59 | ProcessService.class
60 | };
61 |
62 | public static class Application implements CronBox.Application {
63 |
64 | final Configuration config;
65 | final Service service;
66 | final BrokerListener brokerApplication;
67 | final CronBox.Application serviceApplication;
68 |
69 | public Application() {
70 | Logger.info("Starting Crawler Application...");
71 |
72 | // initialize configuration
73 | final List> services = new ArrayList<>();
74 | services.addAll(Arrays.asList(MCP.MCP_SERVLETS));
75 | services.addAll(Arrays.asList(LOADER_SERVICES));
76 | this.config = new Configuration(DATA_PATH, true, LOADER_SERVICE, services.toArray(new Class[services.size()]));
77 |
78 | // initialize loader with user agent
79 | String userAgent = ClientIdentification.getAgent(ClientIdentification.googleAgentName/*.yacyInternetCrawlerAgentName*/).userAgent;
80 | String userAgentType = this.config.properties.get("grid.loader.userAgentType");
81 | if (userAgentType == null || userAgentType.length() == 0) userAgentType = "BROWSER";
82 | if ("CUSTOM".equals(userAgentType)) userAgent = this.config.properties.get("grid.lodeer.userAgentName");
83 | else if ("YACY".equals(userAgentType)) userAgent = ClientIdentification.yacyInternetCrawlerAgent.userAgent;
84 | else if ("GOOGLE".equals(userAgentType)) userAgent = ClientIdentification.getAgent(ClientIdentification.googleAgentName).userAgent;
85 | else userAgent = ClientIdentification.getAgent(ClientIdentification.browserAgentName).userAgent;
86 | LoaderClientConnection.userAgent = userAgent;
87 |
88 | // initialize REST server with services
89 | this.service = new Service(this.config);
90 |
91 | // connect backend
92 | this.config.connectBackend();
93 |
94 | // initiate broker application: listening to indexing requests at RabbitMQ
95 | final boolean disableHeadless = this.config.properties.containsKey("grid.loader.disableHeadless") ? Boolean.parseBoolean(this.config.properties.get("grid.loader.disableHeadless")) : false;
96 | this.brokerApplication = new LoaderListener(LOADER_SERVICE, disableHeadless);
97 |
98 | // initiate service application: listening to REST request
99 | this.serviceApplication = this.service.newServer(null);
100 | }
101 |
102 | @Override
103 | public void run() {
104 |
105 | Logger.info("Grid Name: " + this.config.properties.get("grid.name"));
106 |
107 | // starting threads
108 | new Thread(this.brokerApplication).start();
109 | this.serviceApplication.run(); // SIC! the service application is running as the core element of this run() process. If we run it concurrently, this runnable will be "dead".
110 | }
111 |
112 | @Override
113 | public void stop() {
114 | Logger.info("Stopping MCP Application...");
115 | this.serviceApplication.stop();
116 | this.brokerApplication.stop();
117 | this.service.stop();
118 | this.service.close();
119 | this.config.close();
120 | }
121 |
122 | @Override
123 | public Telemetry getTelemetry() {
124 | return null;
125 | }
126 |
127 | }
128 |
129 | public static void main(final String[] args) {
130 | // run in headless mode
131 | System.setProperty("java.awt.headless", "true"); // no awt used here so we can switch off that stuff
132 |
133 | // Debug Info
134 | boolean assertionenabled = false;
135 | assert (assertionenabled = true) == true; // compare to true to remove warning: "Possible accidental assignement"
136 | if (assertionenabled) Logger.info("Asserts are enabled");
137 |
138 | // first greeting
139 | Logger.info("YaCy Grid Loader started!");
140 |
141 | // run application with cron
142 | final long cycleDelay = Long.parseLong(System.getProperty("YACYGRID_LOADER_CYCLEDELAY", "" + Long.MAX_VALUE)); // by default, run only in one genesis thread
143 | final int cycleRandom = Integer.parseInt(System.getProperty("YACYGRID_LOADER_CYCLERANDOM", "" + 1000 * 60 /*1 minute*/));
144 | final CronBox cron = new CronBox(Application.class, cycleDelay, cycleRandom);
145 | cron.cycle();
146 |
147 | // this line is reached if the cron process was shut down
148 | Logger.info("YaCy Grid Loader terminated");
149 | }
150 |
151 | }
152 |
--------------------------------------------------------------------------------
/src/main/java/net/yacy/grid/loader/LoaderListener.java:
--------------------------------------------------------------------------------
1 | /**
2 | * LoaderListener
3 | * Copyright 25.04.2017 by Michael Peter Christen, @0rb1t3r
4 | *
5 | * This library is free software; you can redistribute it and/or
6 | * modify it under the terms of the GNU Lesser General Public
7 | * License as published by the Free Software Foundation; either
8 | * version 2.1 of the License, or (at your option) any later version.
9 | *
10 | * This library is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 | * Lesser General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Lesser General Public License
16 | * along with this program in the file lgpl21.txt
17 | * If not, see .
18 | */
19 |
20 | package net.yacy.grid.loader;
21 |
22 | import org.json.JSONArray;
23 | import org.json.JSONObject;
24 |
25 | import ai.susi.mind.SusiAction;
26 | import ai.susi.mind.SusiThought;
27 | import net.yacy.grid.YaCyServices;
28 | import net.yacy.grid.loader.retrieval.ContentLoader;
29 | import net.yacy.grid.mcp.AbstractBrokerListener;
30 | import net.yacy.grid.mcp.BrokerListener;
31 | import net.yacy.grid.mcp.Service;
32 | import net.yacy.grid.tools.CronBox.Telemetry;
33 | import net.yacy.grid.tools.Logger;
34 | import net.yacy.grid.tools.Memory;
35 |
36 | /**
37 | * broker listener, takes process messages from the queue "loader", "webloader"
38 | * i.e. test with:
39 | * curl -X POST -F "message=@job.json" -F "serviceName=loader" -F "queueName=webloader" http://yacygrid.com:8100/yacy/grid/mcp/messages/send.json
40 | * where job.json is:
41 | {
42 | "metadata": {
43 | "process": "yacy_grid_loader",
44 | "count": 1
45 | },
46 | "data": [{
47 | "crawlingMode": "url",
48 | "crawlingURL": "http://yacy.net",
49 | "sitemapURL": "",
50 | "crawlingFile": "",
51 | "crawlingDepth": 3,
52 | "crawlingDepthExtension": "",
53 | "range": "domain",
54 | "mustmatch": ".*",
55 | "mustnotmatch": "",
56 | "ipMustmatch": ".*",
57 | "ipMustnotmatch": "",
58 | "indexmustmatch": ".*",
59 | "indexmustnotmatch": "",
60 | "deleteold": "off",
61 | "deleteIfOlderNumber": 0,
62 | "deleteIfOlderUnit": "day",
63 | "recrawl": "nodoubles",
64 | "reloadIfOlderNumber": 0,
65 | "reloadIfOlderUnit": "day",
66 | "crawlingDomMaxCheck": "off",
67 | "crawlingDomMaxPages": 1000,
68 | "crawlingQ": "off",
69 | "cachePolicy": "if fresh",
70 | "collection": "user",
71 | "agentName": "yacybot (yacy.net; crawler from yacygrid.com)",
72 | "user": "anonymous@nowhere.com",
73 | "client": "yacygrid.com"
74 | }],
75 | "actions": [{
76 | "type": "loader",
77 | "queue": "webloader",
78 | "urls": ["http://yacy.net"],
79 | "collection": "test",
80 | "targetasset": "test3/yacy.net.warc.gz",
81 | "actions": [{
82 | "type": "parser",
83 | "queue": "yacyparser",
84 | "sourceasset": "test3/yacy.net.warc.gz",
85 | "targetasset": "test3/yacy.net.jsonlist",
86 | "targetgraph": "test3/yacy.net.graph.json",
87 | "actions": [{
88 | "type": "indexer",
89 | "queue": "elasticsearch",
90 | "sourceasset": "test3/yacy.net.jsonlist"
91 | },{
92 | "type": "crawler",
93 | "queue": "webcrawler",
94 | "sourceasset": "test3/yacy.net.graph.json"
95 | }
96 | ]
97 | }]
98 | }]
99 | }
100 | *
101 | * to check the queue content, see http://www.searchlab.eu:15672/
102 | */
103 | public class LoaderListener extends AbstractBrokerListener implements BrokerListener {
104 |
105 | private final boolean disableHeadless;
106 |
107 | public LoaderListener(final YaCyServices service, final boolean disableHeadless) {
108 | super(Service.instance.config, service, Runtime.getRuntime().availableProcessors());
109 | this.disableHeadless = disableHeadless;
110 | }
111 |
112 | @Override
113 | public ActionResult processAction(final SusiAction action, final JSONArray processData, final String processName, final int processNumber) {
114 |
115 | // check short memory status
116 | if (Memory.shortStatus()) {
117 | Logger.info(this.getClass(), "Loader short memory status: assigned = " + Memory.assigned() + ", used = " + Memory.used());
118 | }
119 |
120 | // find out if we should do headless loading
121 | final String crawlID = action.getStringAttr("id");
122 | if (crawlID == null || crawlID.length() == 0) {
123 | Logger.info(this.getClass(), "Loader.processAction Fail: Action does not have an id: " + action.toString());
124 | return ActionResult.FAIL_IRREVERSIBLE;
125 | }
126 | final JSONObject crawl = SusiThought.selectData(processData, "id", crawlID);
127 | if (crawl == null) {
128 | Logger.info(this.getClass(), "Loader.processAction Fail: ID of Action not found in data: " + action.toString());
129 | return ActionResult.FAIL_IRREVERSIBLE;
130 | }
131 | final int depth = action.getIntAttr("depth");
132 | final int crawlingDepth = crawl.getInt("crawlingDepth");
133 | final int priority = crawl.has("priority") ? crawl.getInt("priority") : 0;
134 | boolean loaderHeadless = crawl.has("loaderHeadless") ? crawl.getBoolean("loaderHeadless") : true;
135 | if (this.disableHeadless) loaderHeadless = false;
136 |
137 | final String targetasset = action.getStringAttr("targetasset");
138 | final boolean archivewarc = action.getBooleanAttr("archivewarc");
139 | final String threadnameprefix = processName + "-" + processNumber;
140 | Thread.currentThread().setName(threadnameprefix + " targetasset=" + targetasset);
141 | if (targetasset != null && targetasset.length() > 0) {
142 | ActionResult actionResult = ActionResult.SUCCESS;
143 | final byte[] b;
144 | try {
145 | final ContentLoader cl = new ContentLoader(action, processData, targetasset.endsWith(".gz"), threadnameprefix, crawlID, depth, crawlingDepth, loaderHeadless, priority);
146 | b = cl.getContent();
147 | actionResult = cl.getResult();
148 | } catch (final Throwable e) {
149 | Logger.warn(this.getClass(), e);
150 | return ActionResult.FAIL_IRREVERSIBLE;
151 | }
152 | if (actionResult == ActionResult.FAIL_IRREVERSIBLE) {
153 | Logger.info(this.getClass(), "Loader.processAction FAILED processed message for targetasset " + targetasset);
154 | return actionResult;
155 | }
156 | Logger.info(this.getClass(), "Loader.processAction SUCCESS processed message for targetasset " + targetasset);
157 | boolean storeToMessage = true; // debug version for now: always true TODO: set to false later
158 | // ATTENTION: we should not send binaries larger than 512MB to RabbitMQ, see https://github.com/rabbitmq/rabbitmq-server/issues/147#issuecomment-470882099
159 | if (!storeToMessage || (archivewarc && Service.instance.config.gridStorage.isS3Connected())) {
160 | try {
161 | Service.instance.config.gridStorage.store(targetasset, b);
162 | Logger.info(this.getClass(), "Loader.processAction stored asset " + targetasset);
163 | } catch (final Throwable e) {
164 | Logger.warn(this.getClass(), "Loader.processAction asset " + targetasset + " could not be stored, carrying the asset within the next action", e);
165 | storeToMessage = true;
166 | }
167 | }
168 | if (storeToMessage) {
169 | final JSONArray actions = action.getEmbeddedActions();
170 | actions.forEach(a ->
171 | new SusiAction((JSONObject) a).setBinaryAsset(targetasset, b)
172 | );
173 | Logger.info(this.getClass(), "Loader.processAction stored asset " + targetasset + " into message");
174 | }
175 | Logger.info(this.getClass(), "Loader.processAction processed message from queue and stored asset " + targetasset);
176 |
177 | // success (has done something)
178 | return actionResult;
179 | }
180 |
181 | // fail (nothing done)
182 | return ActionResult.FAIL_IRREVERSIBLE;
183 | }
184 |
185 | @Override
186 | public Telemetry getTelemetry() {
187 | return null;
188 | }
189 | }
--------------------------------------------------------------------------------
/gradlew:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | #
4 | # Copyright © 2015-2021 the original authors.
5 | #
6 | # Licensed under the Apache License, Version 2.0 (the "License");
7 | # you may not use this file except in compliance with the License.
8 | # You may obtain a copy of the License at
9 | #
10 | # https://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | #
18 |
19 | ##############################################################################
20 | #
21 | # Gradle start up script for POSIX generated by Gradle.
22 | #
23 | # Important for running:
24 | #
25 | # (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is
26 | # noncompliant, but you have some other compliant shell such as ksh or
27 | # bash, then to run this script, type that shell name before the whole
28 | # command line, like:
29 | #
30 | # ksh Gradle
31 | #
32 | # Busybox and similar reduced shells will NOT work, because this script
33 | # requires all of these POSIX shell features:
34 | # * functions;
35 | # * expansions «$var», «${var}», «${var:-default}», «${var+SET}»,
36 | # «${var#prefix}», «${var%suffix}», and «$( cmd )»;
37 | # * compound commands having a testable exit status, especially «case»;
38 | # * various built-in commands including «command», «set», and «ulimit».
39 | #
40 | # Important for patching:
41 | #
42 | # (2) This script targets any POSIX shell, so it avoids extensions provided
43 | # by Bash, Ksh, etc; in particular arrays are avoided.
44 | #
45 | # The "traditional" practice of packing multiple parameters into a
46 | # space-separated string is a well documented source of bugs and security
47 | # problems, so this is (mostly) avoided, by progressively accumulating
48 | # options in "$@", and eventually passing that to Java.
49 | #
50 | # Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS,
51 | # and GRADLE_OPTS) rely on word-splitting, this is performed explicitly;
52 | # see the in-line comments for details.
53 | #
54 | # There are tweaks for specific operating systems such as AIX, CygWin,
55 | # Darwin, MinGW, and NonStop.
56 | #
57 | # (3) This script is generated from the Groovy template
58 | # https://github.com/gradle/gradle/blob/master/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
59 | # within the Gradle project.
60 | #
61 | # You can find Gradle at https://github.com/gradle/gradle/.
62 | #
63 | ##############################################################################
64 |
65 | # Attempt to set APP_HOME
66 |
67 | # Resolve links: $0 may be a link
68 | app_path=$0
69 |
70 | # Need this for daisy-chained symlinks.
71 | while
72 | APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path
73 | [ -h "$app_path" ]
74 | do
75 | ls=$( ls -ld "$app_path" )
76 | link=${ls#*' -> '}
77 | case $link in #(
78 | /*) app_path=$link ;; #(
79 | *) app_path=$APP_HOME$link ;;
80 | esac
81 | done
82 |
83 | APP_HOME=$( cd "${APP_HOME:-./}" && pwd -P ) || exit
84 |
85 | APP_NAME="Gradle"
86 | APP_BASE_NAME=${0##*/}
87 |
88 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
89 | DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
90 |
91 | # Use the maximum available, or set MAX_FD != -1 to use that value.
92 | MAX_FD=maximum
93 |
94 | warn () {
95 | echo "$*"
96 | } >&2
97 |
98 | die () {
99 | echo
100 | echo "$*"
101 | echo
102 | exit 1
103 | } >&2
104 |
105 | # OS specific support (must be 'true' or 'false').
106 | cygwin=false
107 | msys=false
108 | darwin=false
109 | nonstop=false
110 | case "$( uname )" in #(
111 | CYGWIN* ) cygwin=true ;; #(
112 | Darwin* ) darwin=true ;; #(
113 | MSYS* | MINGW* ) msys=true ;; #(
114 | NONSTOP* ) nonstop=true ;;
115 | esac
116 |
117 | CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
118 |
119 |
120 | # Determine the Java command to use to start the JVM.
121 | if [ -n "$JAVA_HOME" ] ; then
122 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
123 | # IBM's JDK on AIX uses strange locations for the executables
124 | JAVACMD=$JAVA_HOME/jre/sh/java
125 | else
126 | JAVACMD=$JAVA_HOME/bin/java
127 | fi
128 | if [ ! -x "$JAVACMD" ] ; then
129 | die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
130 |
131 | Please set the JAVA_HOME variable in your environment to match the
132 | location of your Java installation."
133 | fi
134 | else
135 | JAVACMD=java
136 | which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
137 |
138 | Please set the JAVA_HOME variable in your environment to match the
139 | location of your Java installation."
140 | fi
141 |
142 | # Increase the maximum file descriptors if we can.
143 | if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then
144 | case $MAX_FD in #(
145 | max*)
146 | MAX_FD=$( ulimit -H -n ) ||
147 | warn "Could not query maximum file descriptor limit"
148 | esac
149 | case $MAX_FD in #(
150 | '' | soft) :;; #(
151 | *)
152 | ulimit -n "$MAX_FD" ||
153 | warn "Could not set maximum file descriptor limit to $MAX_FD"
154 | esac
155 | fi
156 |
157 | # Collect all arguments for the java command, stacking in reverse order:
158 | # * args from the command line
159 | # * the main class name
160 | # * -classpath
161 | # * -D...appname settings
162 | # * --module-path (only if needed)
163 | # * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables.
164 |
165 | # For Cygwin or MSYS, switch paths to Windows format before running java
166 | if "$cygwin" || "$msys" ; then
167 | APP_HOME=$( cygpath --path --mixed "$APP_HOME" )
168 | CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" )
169 |
170 | JAVACMD=$( cygpath --unix "$JAVACMD" )
171 |
172 | # Now convert the arguments - kludge to limit ourselves to /bin/sh
173 | for arg do
174 | if
175 | case $arg in #(
176 | -*) false ;; # don't mess with options #(
177 | /?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath
178 | [ -e "$t" ] ;; #(
179 | *) false ;;
180 | esac
181 | then
182 | arg=$( cygpath --path --ignore --mixed "$arg" )
183 | fi
184 | # Roll the args list around exactly as many times as the number of
185 | # args, so each arg winds up back in the position where it started, but
186 | # possibly modified.
187 | #
188 | # NB: a `for` loop captures its iteration list before it begins, so
189 | # changing the positional parameters here affects neither the number of
190 | # iterations, nor the values presented in `arg`.
191 | shift # remove old arg
192 | set -- "$@" "$arg" # push replacement arg
193 | done
194 | fi
195 |
196 | # Collect all arguments for the java command;
197 | # * $DEFAULT_JVM_OPTS, $JAVA_OPTS, and $GRADLE_OPTS can contain fragments of
198 | # shell script including quotes and variable substitutions, so put them in
199 | # double quotes to make sure that they get re-expanded; and
200 | # * put everything else in single quotes, so that it's not re-expanded.
201 |
202 | set -- \
203 | "-Dorg.gradle.appname=$APP_BASE_NAME" \
204 | -classpath "$CLASSPATH" \
205 | org.gradle.wrapper.GradleWrapperMain \
206 | "$@"
207 |
208 | # Use "xargs" to parse quoted args.
209 | #
210 | # With -n1 it outputs one arg per line, with the quotes and backslashes removed.
211 | #
212 | # In Bash we could simply go:
213 | #
214 | # readarray ARGS < <( xargs -n1 <<<"$var" ) &&
215 | # set -- "${ARGS[@]}" "$@"
216 | #
217 | # but POSIX shell has neither arrays nor command substitution, so instead we
218 | # post-process each arg (as a line of input to sed) to backslash-escape any
219 | # character that might be a shell metacharacter, then use eval to reverse
220 | # that process (while maintaining the separation between arguments), and wrap
221 | # the whole thing up as a single "set" statement.
222 | #
223 | # This will of course break if any of these variables contains a newline or
224 | # an unmatched quote.
225 | #
226 |
227 | eval "set -- $(
228 | printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" |
229 | xargs -n1 |
230 | sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' |
231 | tr '\n' ' '
232 | )" '"$@"'
233 |
234 | exec "$JAVACMD" "$@"
235 |
--------------------------------------------------------------------------------
/src/main/java/net/yacy/grid/loader/retrieval/LoaderClientConnection.java:
--------------------------------------------------------------------------------
1 | /**
2 | * ApacheHttpClient
3 | * Copyright 24.2.2018 by Michael Peter Christen, @0rb1t3r
4 | *
5 | * This library is free software; you can redistribute it and/or
6 | * modify it under the terms of the GNU Lesser General Public
7 | * License as published by the Free Software Foundation; either
8 | * version 2.1 of the License, or (at your option) any later version.
9 | *
10 | * This library is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 | * Lesser General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Lesser General Public License
16 | * along with this program in the file lgpl21.txt
17 | * If not, see .
18 | */
19 |
20 | package net.yacy.grid.loader.retrieval;
21 |
22 | import java.io.BufferedInputStream;
23 | import java.io.ByteArrayOutputStream;
24 | import java.io.IOException;
25 | import java.io.InputStream;
26 | import java.net.SocketTimeoutException;
27 | import java.net.UnknownHostException;
28 | import java.nio.charset.StandardCharsets;
29 | import java.util.ArrayList;
30 | import java.util.HashMap;
31 | import java.util.List;
32 | import java.util.Map;
33 | import java.util.concurrent.Executors;
34 | import java.util.concurrent.ScheduledExecutorService;
35 | import java.util.concurrent.TimeUnit;
36 |
37 | import javax.net.ssl.SSLHandshakeException;
38 |
39 | import org.apache.http.Header;
40 | import org.apache.http.HttpEntity;
41 | import org.apache.http.HttpResponse;
42 | import org.apache.http.RequestLine;
43 | import org.apache.http.client.config.RequestConfig;
44 | import org.apache.http.client.methods.HttpGet;
45 | import org.apache.http.client.methods.HttpHead;
46 | import org.apache.http.client.methods.HttpRequestBase;
47 | import org.apache.http.conn.HttpHostConnectException;
48 | import org.apache.http.impl.client.CloseableHttpClient;
49 | import org.apache.http.impl.client.HttpClientBuilder;
50 | import org.apache.http.util.EntityUtils;
51 |
52 | import net.yacy.grid.http.ClientConnection;
53 | import net.yacy.grid.http.ClientIdentification;
54 | import net.yacy.grid.tools.Logger;
55 |
56 | public class LoaderClientConnection implements HttpClient {
57 |
58 | private static final String CRLF = new String(ClientConnection.CRLF, StandardCharsets.US_ASCII);
59 |
60 | public static String userAgent = ClientIdentification.browserAgent.userAgent;
61 | private static CloseableHttpClient httpClient = ClientConnection.getClosableHttpClient(userAgent);
62 | private static ScheduledExecutorService executorService = Executors.newScheduledThreadPool(30);
63 |
64 | static {
65 | RequestConfig config = RequestConfig.custom()
66 | .setConnectTimeout(10000)
67 | .setConnectionRequestTimeout(10000)
68 | .setSocketTimeout(10000).build();
69 | httpClient =
70 | HttpClientBuilder.create().setDefaultRequestConfig(config).build();
71 | }
72 |
73 | private int status_code;
74 | private String mime;
75 | private final Map> header;
76 | private final String requestHeader;
77 |
78 | private String responseHeader;
79 | private byte[] content;
80 |
81 | public LoaderClientConnection(final String url, final boolean head) throws IOException {
82 | this.status_code = -1;
83 | this.content = null;
84 | this.mime = "";
85 | this.header = new HashMap>();
86 |
87 | final HttpRequestBase request = head ? new HttpHead(url) : new HttpGet(url);
88 | request.setHeader("User-Agent", userAgent);
89 | request.setHeader("Accept", "text/html, image/gif, image/jpeg, *; q=.2, */*; q=.2");
90 |
91 | // compute the request header (we do this to have a documentation later of what we did)
92 | final StringBuffer sb = new StringBuffer();
93 | final RequestLine status = request.getRequestLine();
94 | sb.append(status.toString()).append(CRLF);
95 | for (final Header h: request.getAllHeaders()) {
96 | sb.append(h.getName()).append(": ").append(h.getValue()).append(CRLF);
97 | }
98 | sb.append(CRLF);
99 | this.requestHeader = sb.toString();
100 |
101 | // do the request
102 | HttpResponse httpResponse = null;
103 | try {
104 | executorService.schedule(request::abort, (long)10, TimeUnit.SECONDS);
105 | httpResponse = httpClient.execute(request);
106 | } catch (final UnknownHostException e) {
107 | request.releaseConnection();
108 | throw new IOException("client connection failed: unknown host " + request.getURI().getHost());
109 | } catch (final SocketTimeoutException e) {
110 | request.releaseConnection();
111 | throw new IOException("client connection timeout for request: " + request.getURI());
112 | } catch (final SSLHandshakeException e) {
113 | request.releaseConnection();
114 | throw new IOException("client connection handshake error for domain " + request.getURI().getHost() + ": " + e.getMessage());
115 | } catch (final HttpHostConnectException e) {
116 | request.releaseConnection();
117 | throw new IOException("client connection refused for request " + request.getURI() + ": " + e.getMessage());
118 | } catch (final Throwable e) {
119 | request.releaseConnection();
120 | throw new IOException("error " + request.getURI() + ": " + e.getMessage());
121 | } finally {
122 | if (httpResponse != null) {
123 | this.status_code = httpResponse.getStatusLine().getStatusCode();
124 | final HttpEntity httpEntity = httpResponse.getEntity();
125 | if (head || this.status_code != 200) {
126 | EntityUtils.consumeQuietly(httpEntity);
127 | if (!head && this.status_code != 200) {
128 | request.releaseConnection();
129 | throw new IOException("client connection to " + url + " fail (status code " + this.status_code + "): " + httpResponse.getStatusLine().getReasonPhrase());
130 | }
131 | } else {
132 | try {
133 | final InputStream inputStream = new BufferedInputStream(httpEntity.getContent());
134 | final ByteArrayOutputStream r = new ByteArrayOutputStream();
135 | final byte[] b = new byte[1024];
136 | int c;
137 | while ((c = inputStream.read(b)) > 0) r.write(b, 0, c);
138 | this.content = r.toByteArray();
139 | } catch (final IOException e) {
140 | throw e;
141 | }
142 | Logger.info(this.getClass(), "ContentLoader loaded " + url);
143 | }
144 |
145 | // read response header and set mime
146 | if (this.status_code == 200 || this.status_code == 403) {
147 | for (final Header h: httpResponse.getAllHeaders()) {
148 | List vals = this.header.get(h.getName());
149 | if (vals == null) { vals = new ArrayList(); this.header.put(h.getName(), vals); }
150 | vals.add(h.getValue());
151 | if (h.getName().equals("Content-Type")) this.mime = h.getValue();
152 | }
153 | }
154 |
155 | // fix mime in case a font is assigned
156 | final int p = this.mime.indexOf(';');
157 | if (p >= 0) {
158 | String charset = p < this.mime.length() - 2 ? this.mime.substring(p + 2) : "";
159 | this.mime = this.mime.substring(0, p);
160 | if (charset.startsWith("; charset=")) charset = charset.substring(10);
161 | }
162 |
163 | // compute response header string
164 | sb.setLength(0);
165 | sb.append(status.getProtocolVersion()).append(' ').append(this.status_code).append(CRLF);
166 | for (final Map.Entry> headers: this.header.entrySet()) {
167 | for (final String v: headers.getValue()) {
168 | sb.append(headers.getKey()).append(": ").append(v).append(CRLF);
169 | }
170 | }
171 | sb.append(CRLF);
172 | this.responseHeader = sb.toString();
173 | }
174 | request.releaseConnection();
175 | }
176 | }
177 |
178 | @Override
179 | public int getStatusCode() {
180 | return this.status_code;
181 | }
182 |
183 | @Override
184 | public String getMime() {
185 | return this.mime;
186 | }
187 |
188 | @Override
189 | public Map> getHeader() {
190 | return this.header;
191 | }
192 |
193 | @Override
194 | public String getRequestHeader() {
195 | return this.requestHeader;
196 | }
197 |
198 | @Override
199 | public String getResponseHeader() {
200 | return this.responseHeader;
201 | }
202 |
203 | @Override
204 | public byte[] getContent() {
205 | return this.content;
206 | }
207 |
208 | public static void main(final String[] args) {
209 | try {
210 | //final LoaderClientConnection client = new LoaderClientConnection("https://yacy.net", false);
211 | final LoaderClientConnection client = new LoaderClientConnection("https://morrismuseum.org/", false);
212 |
213 | final int status = client.getStatusCode();
214 | System.out.println("status: " + status);
215 | //String requestHeaders = client.getRequestHeader().toString();
216 | //String responseHeaders = client.getResponseHeader().toString();
217 | System.out.println(new String(client.getContent()));
218 |
219 | } catch (final IOException e) {
220 | e.printStackTrace();
221 | }
222 | }
223 | }
224 |
--------------------------------------------------------------------------------
/src/main/java/net/yacy/grid/loader/retrieval/HtmlUnitLoader.java:
--------------------------------------------------------------------------------
1 | /**
2 | * HtmlUnitLoader
3 | * Copyright 25.4.2017 by Michael Peter Christen, @0rb1t3r
4 | *
5 | * This library is free software; you can redistribute it and/or
6 | * modify it under the terms of the GNU Lesser General Public
7 | * License as published by the Free Software Foundation; either
8 | * version 2.1 of the License, or (at your option) any later version.
9 | *
10 | * This library is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 | * Lesser General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Lesser General Public License
16 | * along with this program in the file lgpl21.txt
17 | * If not, see .
18 | */
19 |
20 | package net.yacy.grid.loader.retrieval;
21 |
22 | import java.io.IOException;
23 | import java.net.MalformedURLException;
24 | import java.net.URL;
25 | import java.util.List;
26 | import java.util.Map;
27 | import java.util.TimeZone;
28 |
29 | import com.gargoylesoftware.css.parser.CSSErrorHandler;
30 | import com.gargoylesoftware.css.parser.CSSException;
31 | import com.gargoylesoftware.css.parser.CSSParseException;
32 | import com.gargoylesoftware.htmlunit.BrowserVersion;
33 | import com.gargoylesoftware.htmlunit.HttpMethod;
34 | import com.gargoylesoftware.htmlunit.BrowserVersion.BrowserVersionBuilder;
35 | import com.gargoylesoftware.htmlunit.IncorrectnessListener;
36 | import com.gargoylesoftware.htmlunit.Page;
37 | import com.gargoylesoftware.htmlunit.ScriptException;
38 | import com.gargoylesoftware.htmlunit.TopLevelWindow;
39 | import com.gargoylesoftware.htmlunit.WebClient;
40 | import com.gargoylesoftware.htmlunit.WebClientOptions;
41 | import com.gargoylesoftware.htmlunit.WebRequest;
42 | import com.gargoylesoftware.htmlunit.WebWindow;
43 | import com.gargoylesoftware.htmlunit.html.HtmlPage;
44 | import com.gargoylesoftware.htmlunit.html.parser.HTMLParserListener;
45 | import com.gargoylesoftware.htmlunit.javascript.JavaScriptErrorListener;
46 | import com.gargoylesoftware.htmlunit.util.NameValuePair;
47 | import com.gargoylesoftware.htmlunit.util.UrlUtils;
48 |
49 | import net.yacy.grid.tools.Logger;
50 | import net.yacy.grid.tools.Memory;
51 |
52 | /**
53 | * http://htmlunit.sourceforge.net/
54 | */
55 | public class HtmlUnitLoader {
56 |
57 | public static WebClient getClient() {
58 | return getClient(BrowserVersion.CHROME.getUserAgent());
59 | }
60 |
61 | public static WebClient getClient(String userAgent) {
62 | WebClient webClient = new WebClient(getBrowser(userAgent));
63 | WebClientOptions options = webClient.getOptions();
64 | options.setJavaScriptEnabled(true);
65 | options.setCssEnabled(false);
66 | options.setPopupBlockerEnabled(true);
67 | options.setRedirectEnabled(true);
68 | options.setDownloadImages(false);
69 | options.setGeolocationEnabled(false);
70 | options.setPrintContentOnFailingStatusCode(false);
71 | options.setThrowExceptionOnScriptError(false);
72 | options.setMaxInMemory(0);
73 | options.setHistoryPageCacheLimit(0);
74 | options.setHistorySizeLimit(0);
75 | //ProxyConfig proxyConfig = new ProxyConfig();
76 | //proxyConfig.setProxyHost("127.0.0.1");
77 | //proxyConfig.setProxyPort(Service.getPort());
78 | //options.setProxyConfig(proxyConfig);
79 | webClient.getCache().setMaxSize(10000); // this might be a bit large, is regulated with throttling and client cache clear in short memory status
80 | webClient.setIncorrectnessListener(new IncorrectnessListener() {
81 | @Override
82 | public void notify(String arg0, Object arg1) {}
83 | });
84 | webClient.setCssErrorHandler(new CSSErrorHandler() {
85 | @Override
86 | public void warning(CSSParseException exception) throws CSSException {}
87 | @Override
88 | public void error(CSSParseException exception) throws CSSException {}
89 | @Override
90 | public void fatalError(CSSParseException exception) throws CSSException {}
91 | });
92 | webClient.setJavaScriptErrorListener(new JavaScriptErrorListener() {
93 | @Override
94 | public void timeoutError(HtmlPage arg0, long arg1, long arg2) {}
95 | @Override
96 | public void scriptException(HtmlPage arg0, ScriptException arg1) {}
97 | @Override
98 | public void malformedScriptURL(HtmlPage arg0, String arg1, MalformedURLException arg2) {}
99 | @Override
100 | public void loadScriptError(HtmlPage arg0, URL arg1, Exception arg2) {}
101 | @Override
102 | public void warn(String message, String sourceName, int line, String lineSource, int lineOffset) {}
103 | });
104 | webClient.setHTMLParserListener(new HTMLParserListener() {
105 | @Override
106 | public void error(String message, URL url, String html, int line, int column, String key) {}
107 | @Override
108 | public void warning(String message, URL url, String html, int line, int column, String key) {}
109 | });
110 | return webClient;
111 | }
112 |
113 |
114 | private static BrowserVersion getBrowser(String userAgent) {
115 | BrowserVersionBuilder browserBuilder = getBrowserBuilder();
116 | browserBuilder.setUserAgent(userAgent);
117 | return browserBuilder.build();
118 | }
119 |
120 | private static BrowserVersionBuilder getBrowserBuilder() {
121 | BrowserVersionBuilder browserBuilder = new BrowserVersion.BrowserVersionBuilder(BrowserVersion.CHROME);
122 | browserBuilder.setSystemTimezone(TimeZone.getDefault());
123 | return browserBuilder;
124 | }
125 |
126 | private String url, xml, responseHeaders, requestHeaders;
127 |
128 | public String getUrl() {
129 | return this.url;
130 | }
131 |
132 | public String getXml() {
133 | return this.xml;
134 | }
135 |
136 | public String getResponseHeaders() {
137 | return this.responseHeaders;
138 | }
139 |
140 | public String getRequestHeaders() {
141 | return this.requestHeaders;
142 | }
143 |
144 | private String parseRequestHeaders(HttpMethod httpMethod, String url, Map headers) {
145 | String header = String.format("%s %s HTTP/1.1", httpMethod.toString(), url);
146 |
147 | for (Map.Entry entry : headers.entrySet()) {
148 | header = String.format(
149 | "%s\n%s: %s",
150 | header,
151 | entry.getKey(),
152 | entry.getValue()
153 | );
154 | }
155 |
156 | return String.format("%s\n\n", header);
157 | }
158 |
159 | private String parseResponseHeaders(int statusCode, List headers) {
160 | String header = String.format("HTTP/1.1 %d", statusCode);
161 |
162 | for (NameValuePair nameValuePair : headers) {
163 | header = String.format(
164 | "%s\n%s: %s",
165 | header,
166 | nameValuePair.getName(),
167 | nameValuePair.getValue()
168 | );
169 | }
170 |
171 | return String.format("%s\n\n", header);
172 | }
173 |
174 | public HtmlUnitLoader(String url, String windowName) throws IOException {// check short memory status
175 |
176 | this.url = url;
177 | HtmlPage page;
178 | try (WebClient client = getClient()) {
179 | long mem0 = Memory.available();
180 | URL uurl = UrlUtils.toUrlUnsafe(url);
181 | String htmlAcceptHeader = client.getBrowserVersion().getHtmlAcceptHeader();
182 | WebWindow webWindow = client.openWindow(uurl, windowName); // throws ClassCastException: com.gargoylesoftware.htmlunit.UnexpectedPage cannot be cast to com.gargoylesoftware.htmlunit.html.HtmlPage
183 | WebRequest webRequest = new WebRequest(uurl, htmlAcceptHeader, null);
184 | page = client.getPage(webWindow, webRequest); // com.gargoylesoftware.htmlunit.xml.XmlPage cannot be cast to com.gargoylesoftware.htmlunit.html.HtmlPage
185 | this.xml = page.asXml();
186 |
187 | this.requestHeaders = this.parseRequestHeaders(
188 | webRequest.getHttpMethod(),
189 | url,
190 | webRequest.getAdditionalHeaders()
191 | );
192 |
193 | this.responseHeaders = this.parseResponseHeaders(
194 | page.getWebResponse().getStatusCode(),
195 | page.getWebResponse().getResponseHeaders()
196 | );
197 |
198 | long mem1 = Memory.available();
199 | Page htmlpage = webWindow.getEnclosedPage();
200 | htmlpage.cleanUp();
201 | if (webWindow instanceof TopLevelWindow) ((TopLevelWindow) webWindow).close();
202 | for (WebWindow ww: client.getWebWindows()) {
203 | if (ww instanceof TopLevelWindow) ((TopLevelWindow) ww).close();
204 | ww.getJobManager().removeAllJobs();
205 | }
206 | client.deregisterWebWindow(webWindow);
207 | client.getCache().clear();
208 | client.close();
209 | long mem2 = Memory.available();
210 | Logger.info(this.getClass(), "HtmlUnitLoader loaded " + url + " - " + this.xml.length() + " bytes; used " + (mem1 - mem0) + " bytes, after cleanup " + (mem2 - mem0) + " bytes");
211 | } catch (Throwable e) {
212 | // there can be many reasons here, i.e. an error in javascript
213 | // we should always treat this as if the error is within the HTMLUnit, not the web page.
214 | // Therefore, we should do a fail-over without HTMLUnit
215 | // Data.logger.warn("HtmlUnitLoader Error loading " + url, e);
216 | // load the page with standard client anyway
217 | // to do this, we throw an IOException here and the caller must handle this
218 | throw new IOException(e.getMessage());
219 | }
220 | }
221 |
222 | }
223 |
--------------------------------------------------------------------------------
/src/main/java/net/yacy/grid/loader/retrieval/ContentLoader.java:
--------------------------------------------------------------------------------
1 | /**
2 | * ContentLoader
3 | * Copyright 11.5.2017 by Michael Peter Christen, @0rb1t3r
4 | *
5 | * This library is free software; you can redistribute it and/or
6 | * modify it under the terms of the GNU Lesser General Public
7 | * License as published by the Free Software Foundation; either
8 | * version 2.1 of the License, or (at your option) any later version.
9 | *
10 | * This library is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 | * Lesser General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU Lesser General Public License
16 | * along with this program in the file lgpl21.txt
17 | * If not, see .
18 | */
19 |
20 | package net.yacy.grid.loader.retrieval;
21 |
22 | import java.io.ByteArrayOutputStream;
23 | import java.io.File;
24 | import java.io.IOException;
25 | import java.io.OutputStream;
26 | import java.nio.charset.StandardCharsets;
27 | import java.text.SimpleDateFormat;
28 | import java.util.ArrayList;
29 | import java.util.Date;
30 | import java.util.HashMap;
31 | import java.util.LinkedHashMap;
32 | import java.util.List;
33 | import java.util.Locale;
34 | import java.util.Map;
35 | import java.util.concurrent.atomic.AtomicLong;
36 | import java.util.regex.Matcher;
37 | import java.util.regex.Pattern;
38 |
39 | import org.json.JSONArray;
40 | import org.jwat.warc.WarcWriter;
41 | import org.jwat.warc.WarcWriterFactory;
42 |
43 | import ai.susi.mind.SusiAction;
44 | import ai.susi.mind.SusiAction.RenderType;
45 | import net.yacy.grid.io.index.CrawlerDocument;
46 | import net.yacy.grid.io.index.CrawlerDocument.Status;
47 | import net.yacy.grid.loader.JwatWarcWriter;
48 | import net.yacy.grid.mcp.BrokerListener.ActionResult;
49 | import net.yacy.grid.mcp.Service;
50 | import net.yacy.grid.tools.Classification;
51 | import net.yacy.grid.tools.Digest;
52 | import net.yacy.grid.tools.Logger;
53 | import net.yacy.grid.tools.MultiProtocolURL;
54 |
55 | public class ContentLoader {
56 |
57 | private final static Pattern charsetPattern = Pattern.compile("charset=([^\\s]+)");
58 |
59 | private byte[] content;
60 | private ActionResult result;
61 |
62 | public ContentLoader(
63 | final SusiAction action, final JSONArray data, final boolean compressed, final String threadnameprefix,
64 | final String id, final int depth, final int crawlingDepth, final boolean loaderHeadless, final int priority) {
65 | this.content = new byte[0];
66 | this.result = ActionResult.FAIL_IRREVERSIBLE;
67 |
68 | // this must have a loader action
69 | if (action.getRenderType() != RenderType.loader) {
70 | return;
71 | }
72 |
73 | // extract urls
74 | final JSONArray urls = action.getArrayAttr("urls");
75 | final List urlss = new ArrayList<>();
76 | urls.forEach(u -> urlss.add(((String) u)));
77 | final byte[] warcPayload = data.toString(2).getBytes(StandardCharsets.UTF_8);
78 |
79 | // start loading
80 | Thread.currentThread().setName(threadnameprefix + " loading " + urlss.toString());
81 |
82 | // construct a WARC
83 | ByteArrayOutputStream out = new ByteArrayOutputStream();
84 | try {
85 | final WarcWriter ww = ContentLoader.initWriter(out, warcPayload, compressed);
86 | final Map errors = ContentLoader.load(ww, urlss, threadnameprefix, id, depth, crawlingDepth, loaderHeadless, priority);
87 | this.result = ActionResult.SUCCESS;
88 | errors.forEach((u, c) -> {
89 | Logger.debug(this.getClass(), "Loader - cannot load: " + u + " - " + c);
90 | if (c == ActionResult.FAIL_RETRY && this.result == ActionResult.SUCCESS) this.result = ActionResult.FAIL_RETRY;
91 | if (c == ActionResult.FAIL_IRREVERSIBLE) this.result = ActionResult.FAIL_IRREVERSIBLE;
92 | });
93 | } catch (final IOException e) {
94 | Logger.warn(this.getClass(), "ContentLoader WARC writer init problem", e);
95 | } finally {
96 | if (out != null) try {out.close();} catch (final IOException e) {}
97 | }
98 | this.content = ((ByteArrayOutputStream) out).toByteArray();
99 | this.result = ActionResult.SUCCESS;
100 | }
101 |
102 |
103 | public byte[] getContent() {
104 | return this.content;
105 | }
106 |
107 | public ActionResult getResult() {
108 | return this.result;
109 | }
110 |
111 | private final static SimpleDateFormat millisFormat = new SimpleDateFormat("yyyyMMddHHmmssSSS", Locale.US);
112 | private final static AtomicLong createTempFileCounter = new AtomicLong(0);
113 | public static File createTempFile(final String prefix, final String suffix) throws IOException {
114 | final String tmpprefix = prefix + "-" + millisFormat.format(new Date()) + Long.toString(createTempFileCounter.getAndIncrement());
115 | final File tmp = File.createTempFile(tmpprefix, suffix);
116 | return tmp;
117 | }
118 |
119 | private static WarcWriter initWriter(final OutputStream out, final byte[] payload, final boolean compressed) throws IOException {
120 | final WarcWriter ww = WarcWriterFactory.getWriter(out, compressed);
121 | JwatWarcWriter.writeWarcinfo(ww, new Date(), null, null, payload);
122 | return ww;
123 | }
124 |
125 | private static Map load(
126 | final WarcWriter warcWriter, final List urls, final String threadName,
127 | final String id, final int depth, final int crawlingDepth, final boolean loaderHeadless, final int priority) throws IOException {
128 |
129 | // this is here for historical reasons, we actually should have all urls normalized
130 | final List fixedURLs = new ArrayList<>();
131 | urls.forEach(url -> {
132 | if (url.indexOf("//") < 0) url = "http://" + url;
133 | fixedURLs.add(url);
134 | });
135 |
136 | // prepare map with ids and load crawlerDocuments
137 | final Map urlmap = new HashMap<>();
138 | fixedURLs.forEach(url -> urlmap.put(url, Digest.encodeMD5Hex(url)));
139 | final Map crawlerDocuments = CrawlerDocument.loadBulk(Service.instance.config, Service.instance.config.gridIndex, urlmap.values());
140 |
141 | // load content
142 | final Map errors = new LinkedHashMap<>();
143 | fixedURLs.forEach(url -> {
144 |
145 | // do loader throttling here
146 | long throttling = 250;
147 | try {
148 | throttling = Service.instance.config.gridControl.checkThrottling(id, url, depth, crawlingDepth, loaderHeadless, priority);
149 | } catch (final IOException e1) {}
150 | Thread.currentThread().setName(threadName + " loading " + url.toString() + ", throttling = " + throttling);
151 | try {Thread.sleep(throttling);} catch (final InterruptedException e) {}
152 |
153 | // start loading
154 | try {
155 | // load entry from crawler index
156 | final String urlid = urlmap.get(url);
157 | final CrawlerDocument crawlerDocument = crawlerDocuments.get(urlid);
158 | //assert crawlerDocument != null;
159 |
160 | // load content from the network
161 | final long t = System.currentTimeMillis();
162 | try {
163 | boolean success = false;
164 | if (url.startsWith("http")) success = loadHTTP(warcWriter, url, threadName, loaderHeadless);
165 | else if (url.startsWith("ftp")) loadFTP(warcWriter, url);
166 | else if (url.startsWith("smb")) loadSMB(warcWriter, url);
167 |
168 | // write success status
169 | if (success && crawlerDocument != null) {
170 | final long load_time = System.currentTimeMillis() - t;
171 | crawlerDocument.setStatus(Status.loaded).setStatusDate(new Date()).setComment("load time: " + load_time + " milliseconds");
172 | // crawlerDocument.store(Data.gridIndex); we bulk-store this later
173 | // check with http://localhost:9200/crawler/_search?q=status_s:loaded
174 | }
175 | } catch (final IOException e) {
176 | // write fail status
177 | if (crawlerDocument != null) {
178 | final long load_time = System.currentTimeMillis() - t;
179 | crawlerDocument.setStatus(Status.load_failed).setStatusDate(new Date()).setComment("load fail: '" + e.getMessage() + "' after " + load_time + " milliseconds");
180 | // crawlerDocument.store(Data.gridIndex); we bulk-store this later
181 | // check with http://localhost:9200/crawler/_search?q=status_s:load_failed
182 | }
183 | }
184 | } catch (final Throwable e) {
185 | Logger.warn("ContentLoader cannot load " + url + " - " + e.getMessage());
186 | errors.put(url, ActionResult.FAIL_IRREVERSIBLE);
187 | }
188 | });
189 |
190 | // bulk-store the crawler documents
191 | try {
192 | CrawlerDocument.storeBulk(Service.instance.config, Service.instance.config.gridIndex, crawlerDocuments);
193 | } catch (final Throwable e) {
194 | Logger.error(e);
195 | }
196 | return errors;
197 | }
198 |
199 | private static void loadFTP(final WarcWriter warcWriter, final String url) throws IOException {
200 |
201 | }
202 |
203 | private static void loadSMB(final WarcWriter warcWriter, final String url) throws IOException {
204 |
205 | }
206 |
207 | private static boolean loadHTTP(final WarcWriter warcWriter, final String url, final String threadName, final boolean useHeadlessLoader) throws IOException {// check short memory status
208 | final Date loaddate = new Date();
209 | byte[] content = null;
210 | String requestHeaders = null;
211 | String responseHeaders = null;
212 | final MultiProtocolURL u = new MultiProtocolURL(url);
213 |
214 | if (useHeadlessLoader) {
215 | // using the headless loader only makes sense in certain situations:
216 | // we must make sure that the content is actually html, othwewise there is
217 | // no point in usage of the headless loader and we would fall back to normal loading.
218 | String ext = MultiProtocolURL.getFileExtension(u.getFileName());
219 | boolean isHtml = Classification.isHtmlExtension(ext);
220 |
221 | // not all content that is actually html requires an text extension, we also check the mime type by using a head request
222 | if (!isHtml) {
223 | LoaderClientConnection ac = new LoaderClientConnection(url, true);
224 | String mime = ac.getMime();
225 | isHtml = mime.endsWith("/html") || mime.endsWith("/xhtml+xml");
226 | }
227 |
228 | // finally we use the headless loader to get the content
229 | if (isHtml) try {
230 | // use htmlunit to load this
231 | final HtmlUnitLoader htmlUnitLoader = new HtmlUnitLoader(url, threadName);
232 | final String xml = htmlUnitLoader.getXml();
233 |
234 | requestHeaders = htmlUnitLoader.getRequestHeaders();
235 | responseHeaders = htmlUnitLoader.getResponseHeaders();
236 |
237 | // we consider that the resulting charset should be UTF_8
238 | content = xml.getBytes(StandardCharsets.UTF_8);
239 |
240 | // However, the original Content-Type may denote a different charset
241 | // Therefore we must patch that charset now in the response header
242 | Matcher matcher = charsetPattern.matcher(responseHeaders);
243 | if (matcher.find()) {
244 | String oldCharset = matcher.group(1);
245 | String newCharset = StandardCharsets.UTF_8.name();
246 | if (!oldCharset.equals(newCharset)) {
247 | StringBuffer sb = new StringBuffer();
248 | matcher.appendReplacement(sb, "charset=" + newCharset);
249 | matcher.appendTail(sb);
250 | responseHeaders = sb.toString();
251 | }
252 | }
253 | } catch (final Throwable e) {
254 | // do nothing here, input stream is not set
255 | final String cause = e == null ? "null" : e.getMessage();
256 | if (cause != null && cause.indexOf("404") >= 0) {
257 | throw new IOException("" + url + " fail: " + cause);
258 | }
259 | Logger.debug("Loader - HtmlUnit failed (will retry): " + cause);
260 | }
261 | }
262 |
263 | // Here we may not have loaded the content because of not-required headless loading or
264 | // because headless loading has failed. Do a normal loading:
265 | if (content == null) {
266 | // do another http request. This can either happen because mime type is not html
267 | // or it was html and HtmlUnit has failed - we retry the normal way here.
268 |
269 | LoaderClientConnection ac = new LoaderClientConnection(url, false);
270 | final int status = ac.getStatusCode();
271 | if (status != 200) return false;
272 |
273 | requestHeaders = ac.getRequestHeader();
274 | responseHeaders = ac.getResponseHeader();
275 |
276 | content = ac.getContent();
277 | }
278 |
279 | if (content == null || content.length == 0) return false;
280 |
281 | JwatWarcWriter.writeRequest(warcWriter, url, null, loaddate, null, null, requestHeaders.getBytes(StandardCharsets.UTF_8));
282 |
283 | // add the request header before the content
284 | final ByteArrayOutputStream r = new ByteArrayOutputStream();
285 | r.write(responseHeaders.getBytes(StandardCharsets.UTF_8));
286 | r.write(content);
287 | content = r.toByteArray();
288 |
289 | Logger.info("ContentLoader writing WARC for " + url + " - " + content.length + " bytes");
290 | JwatWarcWriter.writeResponse(warcWriter, url, null, loaddate, null, null, content);
291 |
292 | return true;
293 | }
294 |
295 | private static String getTestWarcContent(String url, boolean loaderHeadless) {
296 | final byte[] warcPayload = "test".getBytes(StandardCharsets.UTF_8);
297 | ByteArrayOutputStream out = new ByteArrayOutputStream();
298 | try {
299 | WarcWriter warcWriter = ContentLoader.initWriter(out, warcPayload, false);
300 | loadHTTP(warcWriter, url, "test", loaderHeadless);
301 | warcWriter.close();
302 | out.close();
303 | String b = new String(out.toByteArray(), StandardCharsets.UTF_8);
304 | return b;
305 | } catch (IOException e) {
306 | e.printStackTrace();
307 | }
308 | return "";
309 | }
310 |
311 | public static void main(String[] args) {
312 | String url = "https://www.schulministerium.nrw.de/BiPo/SchuleAendern/msbleikaleistungen.html?katalogId=99088003034004";
313 |
314 | String headless = getTestWarcContent(url, true);
315 | String normal = getTestWarcContent(url, false);
316 | System.out.println("headless:\n" + headless);
317 | System.out.println("\nnormal:\n" + normal);
318 | //System.out.println("Difference: " + StringUtils.difference(headless, normal)); // requires import org.apache.commons.lang3.StringUtils;
319 | System.exit(0);
320 | }
321 |
322 | }
323 |
--------------------------------------------------------------------------------
/src/main/java/net/yacy/grid/loader/retrieval/FTPClient.java:
--------------------------------------------------------------------------------
1 | /**
2 | * FTPClient
3 | * Copyright 2002, 2004, 2006, 2010 by Michael Peter Christen
4 | * first published on http://yacy.net
5 | * main implementation finished: 28.05.2002
6 | * last major change: 06.05.2004
7 | * added html generation for directories: 5.9.2006
8 | * migrated to the cora package and re-licensed under lgpl: 23.08.2010
9 | *
10 | * This file is part of YaCy Content Integration
11 | *
12 | * This library is free software; you can redistribute it and/or
13 | * modify it under the terms of the GNU Lesser General Public
14 | * License as published by the Free Software Foundation; either
15 | * version 2.1 of the License, or (at your option) any later version.
16 | *
17 | * This library is distributed in the hope that it will be useful,
18 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 | * Lesser General Public License for more details.
21 | *
22 | * You should have received a copy of the GNU Lesser General Public License
23 | * along with this program in the file lgpl21.txt
24 | * If not, see .
25 | */
26 |
27 | package net.yacy.grid.loader.retrieval;
28 |
29 | import java.io.BufferedOutputStream;
30 | import java.io.BufferedReader;
31 | import java.io.ByteArrayOutputStream;
32 | import java.io.DataInputStream;
33 | import java.io.DataOutputStream;
34 | import java.io.File;
35 | import java.io.FileInputStream;
36 | import java.io.FileNotFoundException;
37 | import java.io.FileOutputStream;
38 | import java.io.IOException;
39 | import java.io.InputStream;
40 | import java.io.InputStreamReader;
41 | import java.io.OutputStream;
42 | import java.io.PrintStream;
43 | import java.io.RandomAccessFile;
44 | import java.lang.reflect.Array;
45 | import java.lang.reflect.InvocationTargetException;
46 | import java.lang.reflect.Method;
47 | import java.net.InetAddress;
48 | import java.net.InetSocketAddress;
49 | import java.net.ServerSocket;
50 | import java.net.Socket;
51 | import java.net.SocketException;
52 | import java.nio.charset.StandardCharsets;
53 | import java.text.DateFormat;
54 | import java.text.ParseException;
55 | import java.text.SimpleDateFormat;
56 | import java.util.ArrayList;
57 | import java.util.Calendar;
58 | import java.util.Date;
59 | import java.util.HashMap;
60 | import java.util.List;
61 | import java.util.Locale;
62 | import java.util.Map;
63 | import java.util.Properties;
64 | import java.util.StringTokenizer;
65 | import java.util.concurrent.BlockingQueue;
66 | import java.util.concurrent.LinkedBlockingQueue;
67 | import java.util.regex.Matcher;
68 | import java.util.regex.Pattern;
69 |
70 | import net.yacy.grid.tools.Domains;
71 | import net.yacy.grid.tools.Logger;
72 |
73 | public class FTPClient {
74 |
75 | public static final String ANONYMOUS = "anonymous";
76 |
77 | private static final String vDATE = "20161222";
78 |
79 | private boolean glob = true; // glob = false -> filenames are taken
80 | // literally for mget, ..
81 |
82 | // transfer type
83 | private static final char transferType = 'i'; // transfer binary
84 |
85 | // block size [1K by default]
86 | private static final int blockSize = 1024;
87 |
88 | // client socket for commands
89 | private Socket ControlSocket = null;
90 |
91 | // socket timeout
92 | private static final int ControlSocketTimeout = 10000;
93 |
94 | // data socket timeout
95 | private int DataSocketTimeout = 0; // in seconds (default infinite)
96 |
97 | // socket for data transactions
98 | private ServerSocket DataSocketActive = null;
99 | private Socket DataSocketPassive = null;
100 | private boolean DataSocketPassiveMode = true;
101 |
102 | // output and input streams for client control connection
103 | private BufferedReader clientInput = null;
104 | private DataOutputStream clientOutput = null;
105 |
106 | // client prompt
107 | private String prompt = "ftp [local]>";
108 |
109 | String[] cmd;
110 |
111 | // session parameters
112 | File currentLocalPath;
113 | String account, password, host, remotemessage, remotegreeting, remotesystem;
114 | int port;
115 |
116 | // entry info cache
117 | private final Map infoCache = new HashMap();
118 |
119 | // date-format in LIST (english month names)
120 | private static final SimpleDateFormat lsDateFormat = new SimpleDateFormat("MMM d y H:m", new Locale("en"));
121 |
122 | // TODO: implement RFC 2640 Internationalization
123 |
124 | public FTPClient() {
125 |
126 | this.currentLocalPath = new File(System.getProperty("user.dir"));
127 | try {
128 | this.currentLocalPath = new File(this.currentLocalPath.getCanonicalPath());
129 | } catch (final IOException e) {
130 | }
131 |
132 | this.account = null;
133 | this.password = null;
134 | this.host = null;
135 | this.port = -1;
136 | this.remotemessage = null;
137 | this.remotegreeting = null;
138 | this.remotesystem = null;
139 | }
140 |
141 | public boolean exec(String command, final boolean promptIt) {
142 | if ((command == null) || (command.isEmpty())) {
143 | return true;
144 | }
145 | int pos;
146 | String com;
147 | boolean ret = true;
148 | while (command.length() > 0) {
149 | pos = command.indexOf(';',0);
150 | if (pos < 0) {
151 | pos = command.indexOf("\n",0);
152 | }
153 | if (pos < 0) {
154 | com = command;
155 | command = "";
156 | } else {
157 | com = command.substring(0, pos);
158 | command = command.substring(pos + 1);
159 | }
160 | if (promptIt) {
161 | Logger.info(this.prompt + com);
162 | }
163 | this.cmd = line2args(com);
164 | try {
165 | ret = (((Boolean) getClass().getMethod(this.cmd[0].toUpperCase(), (Class>[]) Array.newInstance(Class.class, 0)).invoke(this, (Object[]) Array.newInstance(Object.class, 0)))
166 | .booleanValue());
167 | } catch (final InvocationTargetException e) {
168 | if (e.getMessage() != null) {
169 | if (notConnected()) {
170 | // the error was probably caused because there is no
171 | // connection
172 | Logger.warn("not connected. no effect.", e);
173 | } else {
174 | Logger.warn("ftp internal exception: target exception " + e);
175 | }
176 | return ret;
177 | }
178 | } catch (final IllegalAccessException e) {
179 | Logger.warn("ftp internal exception: wrong access " + e);
180 | return ret;
181 | } catch (final NoSuchMethodException e) {
182 | // consider first that the user attempted to execute a java
183 | // command from
184 | // the current path; either local or remote
185 | if (notConnected()) {
186 | // try a local exec
187 | try {
188 | javaexec(this.cmd);
189 | } catch (final Exception ee) {
190 | Logger.warn("Command '" + this.cmd[0] + "' not supported. Try 'HELP'.");
191 | }
192 | } else {
193 | // try a remote exec
194 | exec("java " + com, false);
195 | }
196 | return ret;
197 | }
198 | }
199 | return ret;
200 | }
201 |
202 | private String[] line2args(final String line) {
203 | // parse the command line
204 | if ((line == null) || (line.isEmpty())) {
205 | return null;
206 | }
207 | // pre-parse
208 | String line1 = "";
209 | boolean quoted = false;
210 | for (int i = 0; i < line.length(); i++) {
211 | if (quoted) {
212 | if (line.charAt(i) == '"') {
213 | quoted = false;
214 | } else {
215 | line1 = line1 + line.charAt(i);
216 | }
217 | } else {
218 | if (line.charAt(i) == '"') {
219 | quoted = true;
220 | } else if (line.charAt(i) == ' ') {
221 | line1 = line1 + '|';
222 | } else {
223 | line1 = line1 + line.charAt(i);
224 | }
225 | }
226 | }
227 | return line1.split("\\|");
228 | }
229 |
230 | static class cl extends ClassLoader {
231 |
232 | public cl() {
233 | super();
234 | }
235 |
236 | @Override
237 | public synchronized Class> loadClass(final String classname, final boolean resolve) throws ClassNotFoundException {
238 | Class> c = findLoadedClass(classname);
239 | if (c == null) {
240 | try {
241 | // second try: ask the system
242 | c = findSystemClass(classname);
243 | } catch (final ClassNotFoundException e) {
244 | // third try: load myself
245 | final File f = new File(System.getProperty("user.dir"), classname + ".class");
246 | final int length = (int) f.length();
247 | final byte[] classbytes = new byte[length];
248 | DataInputStream in = null;
249 | try {
250 | in = new DataInputStream(new FileInputStream(f));
251 | in.readFully(classbytes);
252 | c = defineClass(classname, classbytes, 0, classbytes.length);
253 | } catch (final FileNotFoundException ee) {
254 | throw new ClassNotFoundException();
255 | } catch (final IOException ee) {
256 | throw new ClassNotFoundException();
257 | } finally {
258 | try {
259 | in.close();
260 | } catch (final IOException ioe) {
261 | Logger.warn("Could not close input stream on file " + f);
262 | }
263 | }
264 | }
265 | }
266 | if (resolve) {
267 | resolveClass(c);
268 | }
269 | return c;
270 | }
271 |
272 | }
273 |
274 | private void javaexec(final String[] inArgs) {
275 | final String obj = inArgs[0];
276 | final String[] args = new String[inArgs.length - 1];
277 |
278 | // remove the object name from the array of arguments
279 | System.arraycopy(inArgs, 1, args, 0, inArgs.length - 1);
280 |
281 | // Build the argument list for invoke() method.
282 | final Object[] argList = new Object[1];
283 | argList[0] = args;
284 |
285 | final Properties pr = System.getProperties();
286 | final String origPath = (String) pr.get("java.class.path");
287 | try {
288 |
289 | // set the user.dir to the actual local path
290 | pr.put("user.dir", this.currentLocalPath.toString());
291 |
292 | // add the current path to the classpath
293 | // pr.put("java.class.path", "" + pr.get("user.dir") +
294 | // pr.get("path.separator") + origPath);
295 |
296 | // Logger.warning("System Properties: " + pr.toString());
297 |
298 | System.setProperties(pr);
299 |
300 | // locate object
301 | final Class> c = (new cl()).loadClass(obj);
302 | // Class c = this.getClass().getClassLoader().loadClass(obj);
303 |
304 | // locate public static main(String[]) method
305 | final Class>[] parameterType = (Class>[]) Array.newInstance(Class.class, 1);
306 | parameterType[0] = Class.forName("[Ljava.lang.String;");
307 | Method m = c.getMethod("main", parameterType);
308 |
309 | // invoke object.main()
310 | final Object result = m.invoke(null, argList);
311 | //parameterType = null;
312 | m = null;
313 |
314 | // handle result
315 | if (result != null) {
316 | Logger.info("returns " + result);
317 | }
318 |
319 | // set the local path to the user.dir (which may have changed)
320 | this.currentLocalPath = new File((String) pr.get("user.dir"));
321 |
322 | } catch (final ClassNotFoundException e) {
323 | // Logger.warning("cannot find class file " + obj +
324 | // ".class");
325 | // class file does not exist, go silently over it to not show
326 | // everybody that the
327 | // system attempted to load a class file
328 | Logger.warn("Command '" + obj + "' not supported. Try 'HELP'.");
329 | } catch (final NoSuchMethodException e) {
330 | Logger.warn("no \"public static main(String args[])\" in " + obj);
331 | } catch (final InvocationTargetException e) {
332 | final Throwable orig = e.getTargetException();
333 | if (orig.getMessage() != null) {
334 | Logger.warn("Exception from " + obj + ": " + orig.getMessage(), orig);
335 | }
336 | } catch (final IllegalAccessException e) {
337 | Logger.warn("Illegal access for " + obj + ": class is probably not declared as public", e);
338 | } catch (final NullPointerException e) {
339 | Logger.warn("main(String args[]) is not defined as static for " + obj);
340 | /*
341 | * } catch (final IOException e) { // class file does not exist, go
342 | * silently over it to not show everybody that the // system
343 | * attempted to load a class file Logger.warning("Command '" + obj + "'
344 | * not supported. Try 'HELP'.");
345 | */
346 | } catch (final Exception e) {
347 | Logger.warn("Exception caught: ", e);
348 | }
349 |
350 | // set the classpath to its original definition
351 | pr.put("java.class.path", origPath);
352 |
353 | }
354 |
355 | // FTP CLIENT COMMANDS ------------------------------------
356 |
357 | public boolean ASCII() {
358 | if (this.cmd.length != 1) {
359 | Logger.warn("Syntax: ASCII (no parameter)");
360 | return true;
361 | }
362 | try {
363 | literal("TYPE A");
364 | } catch (final IOException e) {
365 | Logger.warn("Error: ASCII transfer type not supported by server.");
366 | }
367 | return true;
368 | }
369 |
370 | public boolean BINARY() {
371 | if (this.cmd.length != 1) {
372 | Logger.warn("Syntax: BINARY (no parameter)");
373 | return true;
374 | }
375 | try {
376 | literal("TYPE I");
377 | } catch (final IOException e) {
378 | Logger.warn("Error: BINARY transfer type not supported by server.");
379 | }
380 | return true;
381 | }
382 |
383 | public boolean BYE() {
384 | return QUIT();
385 | }
386 |
387 | public boolean CD() {
388 | if (this.cmd.length != 2) {
389 | Logger.warn("Syntax: CD ");
390 | return true;
391 | }
392 | if (notConnected()) {
393 | return LCD();
394 | }
395 | try {
396 | // send cwd command
397 | send("CWD " + this.cmd[1]);
398 |
399 | final String reply = receive();
400 | if (isNotPositiveCompletion(reply)) {
401 | throw new IOException(reply);
402 | }
403 | } catch (final IOException e) {
404 | Logger.warn("Error: change of working directory to path " + this.cmd[1] + " failed.");
405 | }
406 | return true;
407 | }
408 |
409 | public boolean CLOSE() {
410 | return DISCONNECT();
411 | }
412 |
413 | private void rmForced(final String path) throws IOException {
414 | // first try: send DELE command (to delete a file)
415 | send("DELE " + path);
416 | // read reply
417 | final String reply1 = receive();
418 | if (isNotPositiveCompletion(reply1)) {
419 | // second try: send a RMD command (to delete a directory)
420 | send("RMD " + path);
421 | // read reply
422 | final String reply2 = receive();
423 | if (isNotPositiveCompletion(reply2)) {
424 | // third try: test if this thing is a directory or file and send
425 | // appropriate error message
426 | if (isFolder(path)) {
427 | throw new IOException(reply2);
428 | }
429 | throw new IOException(reply1);
430 | }
431 | }
432 | }
433 |
434 | /**
435 | * @param path
436 | * @return date of entry on ftp-server or now if date can not be obtained
437 | */
438 | public Date entryDate(final String path) {
439 | final entryInfo info = fileInfo(path);
440 | Date date = null;
441 | if (info != null) {
442 | date = info.date;
443 | }
444 | return date;
445 | }
446 |
447 | public boolean DEL() {
448 | if (this.cmd.length != 2) {
449 | Logger.warn("Syntax: DEL ");
450 | return true;
451 | }
452 | if (notConnected()) {
453 | return LDEL();
454 | }
455 | try {
456 | rmForced(this.cmd[1]);
457 | } catch (final IOException e) {
458 | Logger.warn("Error: deletion of file " + this.cmd[1] + " failed.");
459 | }
460 | return true;
461 | }
462 |
463 | public boolean RM() {
464 | return DEL();
465 | }
466 |
467 | public boolean DIR() {
468 | if (this.cmd.length > 2) {
469 | Logger.warn("Syntax: DIR [|]");
470 | return true;
471 | }
472 | if (notConnected()) {
473 | return LDIR();
474 | }
475 | try {
476 | List l;
477 | if (this.cmd.length == 2) {
478 | l = list(this.cmd[1], false);
479 | } else {
480 | l = list(".", false);
481 | }
482 | printElements(l);
483 | } catch (final IOException e) {
484 | Logger.warn("Error: remote list not available (1): " + e.getMessage());
485 | }
486 | return true;
487 | }
488 |
489 | public boolean DISCONNECT() {
490 | try {
491 | quit();
492 | Logger.info("---- Connection closed.");
493 | } catch (final IOException e) {
494 | // Connection to server lost
495 | // do not append any error to errPrintln because we can silently go over this error
496 | // otherwise the client treats this case as an error and does not accept the result of the session
497 | }
498 | try {
499 | closeConnection();
500 | } catch (final IOException e) {
501 | this.ControlSocket = null;
502 | this.DataSocketActive = null;
503 | this.DataSocketPassive = null;
504 | this.clientInput = null;
505 | this.clientOutput = null;
506 | }
507 | this.prompt = "ftp [local]>";
508 | return true;
509 | }
510 |
511 | private String quit() throws IOException {
512 |
513 | send("QUIT");
514 |
515 | // read status reply
516 | final String reply = receive();
517 | if (isNotPositiveCompletion(reply)) {
518 | throw new IOException(reply);
519 | }
520 |
521 | closeConnection();
522 |
523 | return reply;
524 | }
525 |
526 | public boolean EXIT() {
527 | return QUIT();
528 | }
529 |
530 | public boolean GET() {
531 | if ((this.cmd.length < 2) || (this.cmd.length > 3)) {
532 | Logger.warn("Syntax: GET []");
533 | return true;
534 | }
535 | final String remote = this.cmd[1]; // (new File(cmd[1])).getName();
536 | final boolean withoutLocalFile = this.cmd.length == 2;
537 |
538 | final String localFilename = (withoutLocalFile) ? remote : this.cmd[2];
539 | final File local = absoluteLocalFile(localFilename);
540 |
541 | if (local.exists()) {
542 | Logger.warn("Error: local file " + local.toString() + " already exists.\n" + " File " + remote
543 | + " not retrieved. Local file unchanged.");
544 | } else {
545 | if (withoutLocalFile) {
546 | retrieveFilesRecursively(remote, false);
547 | } else {
548 | try {
549 | get(local.getAbsolutePath(), remote);
550 | } catch (final IOException e) {
551 | Logger.warn("Error: retrieving file " + remote + " failed. (" + e.getMessage() + ")");
552 | }
553 | }
554 | }
555 | return true;
556 | }
557 |
558 | /**
559 | * @param localFilename
560 | * @return
561 | */
562 | private File absoluteLocalFile(final String localFilename) {
563 | File local;
564 | final File l = new File(localFilename);
565 | if (l.isAbsolute()) {
566 | local = l;
567 | } else {
568 | local = new File(this.currentLocalPath, localFilename);
569 | }
570 | return local;
571 | }
572 |
573 | private void retrieveFilesRecursively(final String remote, final boolean delete) {
574 | final File local = absoluteLocalFile(remote);
575 | try {
576 | get(local.getAbsolutePath(), remote);
577 | try {
578 | if (delete) {
579 | rmForced(remote);
580 | }
581 | } catch (final IOException eee) {
582 | Logger.warn("Warning: remote file or path " + remote + " cannot be removed.");
583 | }
584 | } catch (final IOException e) {
585 | if (e.getMessage().startsWith("550")) {
586 | // maybe it's a "not a plain file" error message", then it can
587 | // be a folder
588 | // test if this exists (then it should be a folder)
589 | if (isFolder(remote)) {
590 | // copy the whole directory
591 | exec("cd \"" + remote + "\";lmkdir \"" + remote + "\";lcd \"" + remote + "\"", true);
592 | // exec("mget *",true);
593 | try {
594 | for (final String element : list(".", false)) {
595 | retrieveFilesRecursively(element, delete);
596 | }
597 | } catch (final IOException ee) {
598 | }
599 | exec("cd ..;lcd ..", true);
600 | try {
601 | if (delete) {
602 | rmForced(remote);
603 | }
604 | } catch (final IOException eee) {
605 | Logger.warn("Warning: remote file or path " + remote + " cannot be removed.");
606 | }
607 | } else {
608 | Logger.warn("Error: remote file or path " + remote + " does not exist.");
609 | }
610 | } else {
611 | Logger.warn("Error: retrieving file " + remote + " failed. (" + e.getMessage() + ")");
612 | }
613 | }
614 | }
615 |
616 | /**
617 | * checks if path is a folder
618 | *
619 | * @param path
620 | * @return true if ftp-server changes to path
621 | */
622 | public boolean isFolder(final String path) {
623 | try {
624 | // /// try to parse LIST output (1 command)
625 | final entryInfo info = fileInfo(path);
626 | if (info != null) {
627 | return info.type == filetype.directory;
628 | }
629 |
630 | // /// try to change to folder (4 commands)
631 | // current folder
632 | final String currentFolder = pwd();
633 | // check if we can change to folder
634 | send("CWD " + path);
635 | final String reply = receive();
636 | if (isNotPositiveCompletion(reply)) {
637 | throw new IOException(reply);
638 | }
639 | // check if we actually changed into the folder
640 | final String changedPath = pwd();
641 | if (!(changedPath.equals(path) || changedPath.equals(currentFolder
642 | + (currentFolder.endsWith("/") ? "" : "/") + path))) {
643 | throw new IOException("folder is '" + changedPath + "' should be '" + path + "'");
644 | }
645 | // return to last folder
646 | send("CWD " + currentFolder);
647 | /*reply =*/ receive();
648 | return true;
649 | } catch (final IOException e) {
650 | return false;
651 | }
652 | }
653 |
654 | public boolean GLOB() {
655 | if (this.cmd.length != 1) {
656 | Logger.warn("Syntax: GLOB (no parameter)");
657 | return true;
658 | }
659 | this.glob = !this.glob;
660 | Logger.info("---- globbing is now turned " + ((this.glob) ? "ON" : "OFF"));
661 | return true;
662 | }
663 |
664 | public boolean HASH() {
665 | Logger.warn("no games implemented");
666 | return true;
667 | }
668 |
669 | /*
670 | * private static String[] shift(String args[]) { if ((args == null) ||
671 | * (args.length == 0)) return args; else { String[] newArgs = new
672 | * String[args.length-1]; System.arraycopy(args, 1, newArgs, 0,
673 | * args.length-1); return newArgs; } } public boolean JAR() { //Sun
674 | * proprietary API may be removed in a future Java release
675 | * sun.tools.jar.Main.main(shift(cmd)); return true; }
676 | */
677 |
678 | public boolean JJENCODE() {
679 | if (this.cmd.length != 2) {
680 | Logger.warn("Syntax: JJENCODE ");
681 | return true;
682 | }
683 | final String path = this.cmd[1];
684 |
685 | final File dir = new File(path);
686 | final File newPath = dir.isAbsolute() ? dir : new File(this.currentLocalPath, path);
687 | if (newPath.exists()) {
688 | if (newPath.isDirectory()) {
689 | // exec("cd \"" + remote + "\";lmkdir \"" + remote + "\";lcd \""
690 | // + remote + "\"",true);
691 | /*
692 | * if not exist %1\nul goto :error cd %1 c:\jdk1.2.2\bin\jar
693 | * -cfM0 ..\%1.jar *.* cd .. c:\jdk1.2.2\bin\jar -cfM %1.jj
694 | * %1.jar del %1.jar
695 | */
696 | String s = "";
697 | final String[] l = newPath.list();
698 | for (final String element : l) {
699 | s = s + " \"" + element + "\"";
700 | }
701 | exec("cd \"" + path + "\";jar -cfM0 ../\"" + path + ".jar\"" + s, true);
702 | exec("cd ..;jar -cfM \"" + path + ".jj\" \"" + path + ".jar\"", true);
703 | exec("rm \"" + path + ".jar\"", true);
704 | } else {
705 | Logger.warn("Error: local path " + newPath.toString() + " denotes not to a directory.");
706 | }
707 | } else {
708 | Logger.warn("Error: local path " + newPath.toString() + " does not exist.");
709 | }
710 | return true;
711 | }
712 |
713 | public boolean JJDECODE() {
714 | if (this.cmd.length != 2) {
715 | Logger.warn("Syntax: JJENCODE ");
716 | return true;
717 | }
718 | final String path = this.cmd[1];
719 | final File dir = new File(path);
720 | final File newPath = dir.isAbsolute() ? dir : new File(this.currentLocalPath, path);
721 | final File newFolder = new File(newPath.toString() + ".dir");
722 | if (newPath.exists()) {
723 | if (!newPath.isDirectory()) {
724 | if (!newFolder.mkdir()) {
725 | /*
726 | * if not exist %1.jj goto :error mkdir %1.dir copy %1.jj
727 | * %1.dir\ > %1.dummy && del %1.dummy cd %1.dir
728 | * c:\jdk1.2.2\bin\jar -xf %1.jj del %1.jj
729 | * c:\jdk1.2.2\bin\jar -xf %1.jar del %1.jar cd ..
730 | */
731 | exec("mkdir \"" + path + ".dir\"", true);
732 |
733 | } else {
734 | Logger.warn("Error: target dir " + newFolder.toString() + " cannot be created");
735 | }
736 | } else {
737 | Logger.warn("Error: local path " + newPath.toString() + " must denote to jar/jar file");
738 | }
739 | } else {
740 | Logger.warn("Error: local path " + newPath.toString() + " does not exist.");
741 | }
742 | return true;
743 | }
744 |
745 | private static String[] argList2StringArray(final String argList) {
746 | return argList.split("\\s");
747 | }
748 |
749 | public boolean JOIN(String[] args) {
750 |
751 | // make sure the specified dest file does not exist
752 | final String dest_name = args[1];
753 | final File dest_file = new File(dest_name);
754 | if (dest_file.exists()) {
755 | Logger.warn("join: destination file " + dest_name + " already exists");
756 | return true;
757 | }
758 |
759 | // prepare or search file names of the input files to be joined
760 | String source_name;
761 | File source_file;
762 | int pc = -1;
763 | // create new string array with file names
764 | // scan first for the files
765 | pc = 0;
766 | source_name = dest_name + ".000";
767 | String argString = "";
768 | source_file = new File(source_name);
769 | while ((source_file.exists()) && (source_file.isFile()) && (source_file.canRead())) {
770 | argString = argString + " " + source_name;
771 | pc++;
772 | source_name = dest_name + (pc < 10 ? ".00" + pc : (pc < 100 ? ".0" + pc : "." + pc));
773 | source_file = new File(source_name);
774 | }
775 | args = argList2StringArray(argString.substring(1));
776 |
777 | // do the join
778 | FileOutputStream dest = null;
779 | FileInputStream source = null;
780 | byte[] buffer;
781 | int bytes_read = 0;
782 |
783 | try {
784 | // open output file
785 | dest = new FileOutputStream(dest_file);
786 | buffer = new byte[1024];
787 |
788 | // append all source files
789 | for (pc = 0; pc < args.length; pc++) {
790 | // open the source file
791 | source_name = args[pc];
792 | source_file = new File(source_name);
793 | source = new FileInputStream(source_file);
794 |
795 | // start with the copy of one source file
796 | while (true) {
797 | bytes_read = source.read(buffer);
798 | if (bytes_read == -1) {
799 | break;
800 | }
801 | dest.write(buffer, 0, bytes_read);
802 | }
803 |
804 | // copy finished. close source file
805 | try {
806 | source.close();
807 | } catch (final IOException e) {
808 | }
809 | }
810 | // close the output file
811 | try {
812 | dest.close();
813 | } catch (final IOException e) {
814 | }
815 |
816 | // if we come to this point then everything went fine
817 | // if the user wanted to delete the source it is save to do so now
818 | for (pc = 0; pc < args.length; pc++) {
819 | try {
820 | if (!(new File(args[pc])).delete()) {
821 | Logger.warn("join: unable to delete file " + args[pc]);
822 | }
823 | } catch (final SecurityException e) {
824 | Logger.warn("join: no permission to delete file " + args[pc]);
825 | }
826 | }
827 | } catch (final FileNotFoundException e) {
828 | } catch (final IOException e) {
829 | }
830 |
831 | // clean up
832 | finally {
833 | // close any opened streams
834 | if (dest != null) {
835 | try {
836 | dest.close();
837 | } catch (final IOException e) {
838 | }
839 | }
840 | if (source != null) {
841 | try {
842 | source.close();
843 | } catch (final IOException e) {
844 | }
845 | }
846 |
847 | // print appropriate message
848 | Logger.warn("join created output from " + args.length + " source files");
849 | }
850 | return true;
851 | }
852 |
853 | public boolean COPY(final String[] args) {
854 | final File dest_file = new File(args[2]);
855 | if (dest_file.exists()) {
856 | Logger.warn("copy: destination file " + args[2] + " already exists");
857 | return true;
858 | }
859 | int bytes_read = 0;
860 | FileOutputStream dest = null;
861 | FileInputStream source = null;
862 | try {
863 | // open output file
864 | dest = new FileOutputStream(dest_file);
865 | final byte[] buffer = new byte[1024];
866 |
867 | // open the source file
868 | final File source_file = new File(args[1]);
869 | source = new FileInputStream(source_file);
870 |
871 | // start with the copy of one source file
872 | while (true) {
873 | bytes_read = source.read(buffer);
874 | if (bytes_read == -1) {
875 | break;
876 | }
877 | dest.write(buffer, 0, bytes_read);
878 | }
879 |
880 | } catch (final FileNotFoundException e) {
881 | } catch (final IOException e) {
882 | } finally {
883 | // copy finished. close source file
884 | if (source != null) {
885 | try {
886 | source.close();
887 | } catch (final IOException e) {
888 | }
889 | }
890 |
891 | // close the output file
892 | if (dest != null) {
893 | try {
894 | dest.close();
895 | } catch (final IOException e) {
896 | }
897 | }
898 | }
899 | return true;
900 | }
901 |
902 | public boolean JAVA() {
903 | String s = "JAVA";
904 | for (int i = 1; i < this.cmd.length; i++) {
905 | s = s + " " + this.cmd[i];
906 | }
907 | try {
908 | send(s);
909 | /* String reply = */receive();
910 | } catch (final IOException e) {
911 | }
912 | return true;
913 | }
914 |
915 | public boolean LCD() {
916 | if (this.cmd.length != 2) {
917 | Logger.warn("Syntax: LCD ");
918 | return true;
919 | }
920 | final String path = this.cmd[1];
921 | final File dir = new File(path);
922 | File newPath = dir.isAbsolute() ? dir : new File(this.currentLocalPath, path);
923 | try {
924 | newPath = new File(newPath.getCanonicalPath());
925 | } catch (final IOException e) {
926 | }
927 | if (newPath.exists()) {
928 | if (newPath.isDirectory()) {
929 | this.currentLocalPath = newPath;
930 | Logger.info("---- New local path: " + this.currentLocalPath.toString());
931 | } else {
932 | Logger.warn("Error: local path " + newPath.toString() + " denotes not a directory.");
933 | }
934 | } else {
935 | Logger.warn("Error: local path " + newPath.toString() + " does not exist.");
936 | }
937 | return true;
938 | }
939 |
940 | public boolean LDEL() {
941 | return LRM();
942 | }
943 |
944 | public boolean LDIR() {
945 | if (this.cmd.length != 1) {
946 | Logger.warn("Syntax: LDIR (no parameter)");
947 | return true;
948 | }
949 | final String[] name = this.currentLocalPath.list();
950 | for (final String element : name) {
951 | Logger.info(ls(new File(this.currentLocalPath, element)));
952 | }
953 | return true;
954 | }
955 |
956 | /**
957 | * parse LIST of file
958 | *
959 | * @param path
960 | * on ftp-server
961 | * @return null if info cannot be determined or error occures
962 | */
963 | public entryInfo fileInfo(final String path) {
964 | if (this.infoCache.containsKey(path)) {
965 | return this.infoCache.get(path);
966 | }
967 | try {
968 | /*
969 | * RFC959 page 33f: If the argument is a pathname, the command is
970 | * analogous to the "list" command except that data shall be
971 | * transferred over the control connection.
972 | */
973 | send("STAT " + path);
974 |
975 | final String reply = receive();
976 | if (isNotPositiveCompletion(reply)) {
977 | throw new IOException(reply);
978 | }
979 |
980 | // check if reply is correct multi-line reply
981 | final String[] lines = reply.split("\\r\\n");
982 | if (lines.length < 3) {
983 | throw new IOException(reply);
984 | }
985 | final int startCode = getStatusCode(lines[0]);
986 | final int endCode = getStatusCode(lines[lines.length - 1]);
987 | if (startCode != endCode) {
988 | throw new IOException(reply);
989 | }
990 |
991 | // first line which gives a result is taken (should be only one)
992 | entryInfo info = null;
993 | final int endFor = lines.length - 1;
994 | for (int i = 1; i < endFor; i++) {
995 | info = parseListData(lines[i]);
996 | if (info != null) {
997 | this.infoCache.put(path, info);
998 | break;
999 | }
1000 | }
1001 | return info;
1002 | } catch (final IOException e) {
1003 | return null;
1004 | }
1005 | }
1006 |
1007 | /**
1008 | * returns status of reply
1009 | *
1010 | * 1 Positive Preliminary reply 2 Positive Completion reply 3 Positive
1011 | * Intermediate reply 4 Transient Negative Completion reply 5 Permanent
1012 | * Negative Completion reply
1013 | *
1014 | * @param reply
1015 | * @return first digit of the reply code
1016 | */
1017 | private int getStatus(final String reply) {
1018 | return Integer.parseInt(reply.substring(0, 1));
1019 | }
1020 |
1021 | /**
1022 | * gives reply code
1023 | *
1024 | * @param reply
1025 | * @return
1026 | */
1027 | private int getStatusCode(final String reply) {
1028 | return Integer.parseInt(reply.substring(0, 3));
1029 | }
1030 |
1031 | /**
1032 | * checks if status code is in group 2 ("2xx message")
1033 | *
1034 | * @param reply
1035 | * @return
1036 | */
1037 | private boolean isNotPositiveCompletion(final String reply) {
1038 | return getStatus(reply) != 2;
1039 | }
1040 |
1041 | private final static Pattern lsStyle = Pattern.compile("^([-\\w]{10}).\\s*\\d+\\s+[-\\w]+\\s+[-\\w]+\\s+(\\d+)\\s+(\\w{3})\\s+(\\d+)\\s+(\\d+:?\\d*)\\s+(.*)$");
1042 |
1043 | /**
1044 | * parses output of LIST from ftp-server currently UNIX ls-style only, ie:
1045 | * -rw-r--r-- 1 root other 531 Jan 29 03:26 README dr-xr-xr-x 2 root 512 Apr
1046 | * 8 1994 etc
1047 | *
1048 | * @param line
1049 | * @return null if not parseable
1050 | */
1051 | private static entryInfo parseListData(final String line) {
1052 | // groups: 1: rights, 2: size, 3: month, 4: day, 5: time or year, 6: name
1053 | final Matcher tokens = lsStyle.matcher(line);
1054 | if (tokens.matches() && tokens.groupCount() == 6) {
1055 | filetype type = filetype.file;
1056 | if (tokens.group(1).startsWith("d")) type = filetype.directory;
1057 | if (tokens.group(1).startsWith("l")) type = filetype.link;
1058 | long size = -1;
1059 | try {
1060 | size = Long.parseLong(tokens.group(2));
1061 | } catch (final NumberFormatException e) {
1062 | Logger.warn("not a number in list-entry: ", e);
1063 | return null;
1064 | }
1065 | String time;
1066 | String year;
1067 | if (tokens.group(5).contains(":")) {
1068 | time = tokens.group(5);
1069 | year = String.valueOf(Calendar.getInstance().get(Calendar.YEAR)); // current
1070 | // year
1071 | } else {
1072 | time = "00:00";
1073 | year = tokens.group(5);
1074 | }
1075 | // construct date string
1076 | // this has to be done, because the list-entry may have multiple
1077 | // spaces, tabs or so
1078 | Date date;
1079 | final String dateString = tokens.group(3) + " " + tokens.group(4) + " " + year + " " + time;
1080 | try {
1081 | synchronized(lsDateFormat) {
1082 | date = lsDateFormat.parse(dateString);
1083 | }
1084 | } catch (final ParseException e) {
1085 | Logger.warn("---- Error: not ls date-format '" + dateString, e);
1086 | date = new Date();
1087 | }
1088 | final String filename = tokens.group(6);
1089 | return new entryInfo(type, size, date, filename);
1090 | }
1091 | return null;
1092 | }
1093 |
1094 |
1095 | public static final entryInfo POISON_entryInfo = new entryInfo();
1096 |
1097 | public static enum filetype {
1098 | file, link, directory;
1099 | }
1100 |
1101 | /**
1102 | * parameter class
1103 | *
1104 | * @author danielr
1105 | * @since 2008-03-13 r4558
1106 | */
1107 | public static class entryInfo {
1108 | /**
1109 | * file type
1110 | */
1111 | public final filetype type;
1112 | /**
1113 | * size in bytes
1114 | */
1115 | public final long size;
1116 | /**
1117 | * date of file
1118 | */
1119 | public final Date date;
1120 | /**
1121 | * name of entry
1122 | */
1123 | public String name;
1124 |
1125 | public entryInfo() {
1126 | this.type = filetype.file;
1127 | this.size = -1;
1128 | this.date = null;
1129 | this.name = null;
1130 | }
1131 |
1132 | /**
1133 | * constructor
1134 | *
1135 | * @param isDir
1136 | * @param size
1137 | * bytes
1138 | * @param date
1139 | * @param name
1140 | */
1141 | public entryInfo(final filetype type, final long size, final Date date, final String name) {
1142 | this.type = type;
1143 | this.size = size;
1144 | this.date = date;
1145 | this.name = name;
1146 | }
1147 |
1148 | /*
1149 | * (non-Javadoc)
1150 | *
1151 | * @see java.lang.Object#toString()
1152 | */
1153 | @Override
1154 | public String toString() {
1155 | final StringBuilder info = new StringBuilder(100);
1156 | info.append(this.name);
1157 | info.append(" (type=");
1158 | info.append(this.type.name());
1159 | info.append(", size=");
1160 | info.append(this.size);
1161 | info.append(", ");
1162 | info.append(this.date);
1163 | info.append(")");
1164 | return info.toString();
1165 | }
1166 | }
1167 |
1168 | private String ls(final File inode) {
1169 | if ((inode == null) || (!inode.exists())) {
1170 | return "";
1171 | }
1172 | String s = "";
1173 | if (inode.isDirectory()) {
1174 | s = s + "d";
1175 | } else if (inode.isFile()) {
1176 | s = s + "-";
1177 | } else {
1178 | s = s + "?";
1179 | }
1180 | if (inode.canRead()) {
1181 | s = s + "r";
1182 | } else {
1183 | s = s + "-";
1184 | }
1185 | if (inode.canWrite()) {
1186 | s = s + "w";
1187 | } else {
1188 | s = s + "-";
1189 | }
1190 | s = s + " " + lenformatted(Long.toString(inode.length()), 9);
1191 | final DateFormat df = DateFormat.getDateTimeInstance();
1192 | s = s + " " + df.format(new Date(inode.lastModified()));
1193 | s = s + " " + inode.getName();
1194 | if (inode.isDirectory()) {
1195 | s = s + "/";
1196 | }
1197 | return s;
1198 | }
1199 |
1200 | private String lenformatted(String s, int l) {
1201 | l = l - s.length();
1202 | while (l > 0) {
1203 | s = " " + s;
1204 | l--;
1205 | }
1206 | return s;
1207 | }
1208 |
1209 | public boolean LITERAL() {
1210 | if (this.cmd.length == 1) {
1211 | Logger.warn("Syntax: LITERAL [] (see RFC959)");
1212 | return true;
1213 | }
1214 | String s = "";
1215 | for (int i = 1; i < this.cmd.length; i++) {
1216 | s = s + " " + this.cmd[i];
1217 | }
1218 | try {
1219 | literal(s.substring(1));
1220 | } catch (final IOException e) {
1221 | Logger.warn("Error: Syntax of FTP-command wrong. See RFC959 for details.");
1222 | }
1223 | return true;
1224 | }
1225 |
1226 | public boolean LLS() {
1227 | return LDIR();
1228 | }
1229 |
1230 | public boolean LMD() {
1231 | return LMKDIR();
1232 | }
1233 |
1234 | public boolean LMKDIR() {
1235 | if (this.cmd.length != 2) {
1236 | Logger.warn("Syntax: LMKDIR ");
1237 | return true;
1238 | }
1239 | final File f = new File(this.currentLocalPath, this.cmd[1]);
1240 | if (f.exists()) {
1241 | Logger.warn("Error: local file/folder " + this.cmd[1] + " already exists");
1242 | } else {
1243 | if (!f.mkdir()) {
1244 | Logger.warn("Error: creation of local folder " + this.cmd[1] + " failed");
1245 | }
1246 | }
1247 | return true;
1248 | }
1249 |
1250 | public boolean LMV() {
1251 | if (this.cmd.length != 3) {
1252 | Logger.warn("Syntax: LMV ");
1253 | return true;
1254 | }
1255 | final File from = new File(this.cmd[1]);
1256 | final File to = new File(this.cmd[2]);
1257 | if (!to.exists()) {
1258 | if (from.renameTo(to)) {
1259 | Logger.info("---- \"" + from.toString() + "\" renamed to \"" + to.toString() + "\"");
1260 | } else {
1261 | Logger.warn("rename failed");
1262 | }
1263 | } else {
1264 | Logger.warn("\"" + to.toString() + "\" already exists");
1265 | }
1266 | return true;
1267 | }
1268 |
1269 | public boolean LPWD() {
1270 | if (this.cmd.length != 1) {
1271 | Logger.warn("Syntax: LPWD (no parameter)");
1272 | return true;
1273 | }
1274 | Logger.info("---- Local path: " + this.currentLocalPath.toString());
1275 | return true;
1276 | }
1277 |
1278 | public boolean LRD() {
1279 | return LMKDIR();
1280 | }
1281 |
1282 | public boolean LRMDIR() {
1283 | if (this.cmd.length != 2) {
1284 | Logger.warn("Syntax: LRMDIR ");
1285 | return true;
1286 | }
1287 | final File f = new File(this.currentLocalPath, this.cmd[1]);
1288 | if (!f.exists()) {
1289 | Logger.warn("Error: local folder " + this.cmd[1] + " does not exist");
1290 | } else {
1291 | if (!f.delete()) {
1292 | Logger.warn("Error: deletion of local folder " + this.cmd[1] + " failed");
1293 | }
1294 | }
1295 | return true;
1296 | }
1297 |
1298 | public boolean LRM() {
1299 | if (this.cmd.length != 2) {
1300 | Logger.warn("Syntax: LRM ");
1301 | return true;
1302 | }
1303 | final File f = new File(this.currentLocalPath, this.cmd[1]);
1304 | if (!f.exists()) {
1305 | Logger.warn("Error: local file " + this.cmd[1] + " does not exist");
1306 | } else {
1307 | if (!f.delete()) {
1308 | Logger.warn("Error: deletion of file " + this.cmd[1] + " failed");
1309 | }
1310 | }
1311 | return true;
1312 | }
1313 |
1314 | public boolean LS() {
1315 | if (this.cmd.length > 2) {
1316 | Logger.warn("Syntax: LS [|]");
1317 | return true;
1318 | }
1319 | if (notConnected()) {
1320 | return LLS();
1321 | }
1322 | try {
1323 | List l;
1324 | if (this.cmd.length == 2) {
1325 | l = list(this.cmd[1], true);
1326 | } else {
1327 | l = list(".", true);
1328 | }
1329 | printElements(l);
1330 | } catch (final IOException e) {
1331 | Logger.warn("Error: remote list not available (2): " + e.getMessage());
1332 | }
1333 | return true;
1334 | }
1335 |
1336 | /**
1337 | * @param list
1338 | */
1339 | private void printElements(final List list) {
1340 | Logger.info("---- v---v---v---v---v---v---v---v---v---v---v---v---v---v---v---v---v---v---v");
1341 | for (final String element : list) {
1342 | Logger.info(element);
1343 | }
1344 | Logger.info("---- ^---^---^---^---^---^---^---^---^---^---^---^---^---^---^---^---^---^---^");
1345 | }
1346 |
1347 | public List list(final String path, final boolean extended) throws IOException {
1348 |
1349 | createDataSocket();
1350 |
1351 | send("CWD " + path);
1352 | String reply = receive();
1353 | // get status code
1354 | int status = getStatus(reply);
1355 | if (status > 2) {
1356 | throw new IOException(reply);
1357 | }
1358 |
1359 | // send command to the control port
1360 | if (extended) {
1361 | send("LIST");
1362 | } else {
1363 | send("NLST");
1364 | }
1365 |
1366 | // read status of the command from the control port
1367 | reply = receive();
1368 |
1369 | // get status code
1370 | status = getStatus(reply);
1371 | if (status != 1) {
1372 | throw new IOException(reply);
1373 | }
1374 |
1375 | // starting data transaction
1376 | final Socket dataSocket = getDataSocket();
1377 | final BufferedReader dataStream = new BufferedReader(new InputStreamReader(dataSocket.getInputStream()));
1378 |
1379 | // read file system data
1380 | String line;
1381 | final ArrayList files = new ArrayList();
1382 | try {
1383 | while ((line = dataStream.readLine()) != null) {
1384 | if (!line.startsWith("total ")) {
1385 | files.add(line);
1386 | }
1387 | }
1388 | } catch (final IOException e1) {
1389 | e1.printStackTrace();
1390 | } finally {try {
1391 | // shutdown data connection
1392 | dataStream.close(); // Closing the returned InputStream will
1393 | closeDataSocket(); // close the associated socket.
1394 | } catch (final IOException e) {
1395 | e.printStackTrace();
1396 | }}
1397 | // after stream is empty we should get control completion echo
1398 | reply = receive();
1399 | //System.out.println("reply of LIST: " + reply);
1400 | // boolean success = !isNotPositiveCompletion(reply);
1401 | //for (String s: files) System.out.println("FILES of '" + path + "': " + s);
1402 |
1403 | files.trimToSize();
1404 | return files;
1405 | }
1406 |
1407 | public boolean MDIR() {
1408 | return MKDIR();
1409 | }
1410 |
1411 | public boolean MKDIR() {
1412 | if (this.cmd.length != 2) {
1413 | Logger.warn("Syntax: MKDIR ");
1414 | return true;
1415 | }
1416 | if (notConnected()) {
1417 | return LMKDIR();
1418 | }
1419 | try {
1420 | // send mkdir command
1421 | send("MKD " + this.cmd[1]);
1422 | // read reply
1423 | final String reply = receive();
1424 | if (isNotPositiveCompletion(reply)) {
1425 | throw new IOException(reply);
1426 | }
1427 | } catch (final IOException e) {
1428 | Logger.warn("Error: creation of folder " + this.cmd[1] + " failed");
1429 | }
1430 | return true;
1431 | }
1432 |
1433 | public boolean MGET() {
1434 | if (this.cmd.length != 2) {
1435 | Logger.warn("Syntax: MGET ");
1436 | return true;
1437 | }
1438 | try {
1439 | mget(this.cmd[1], false);
1440 | } catch (final IOException e) {
1441 | Logger.warn("Error: mget failed (" + e.getMessage() + ")");
1442 | }
1443 | return true;
1444 | }
1445 |
1446 | private void mget(final String pattern, final boolean remove) throws IOException {
1447 | final List l = list(".", false);
1448 | File local;
1449 | for (final String remote : l) {
1450 | if (matches(remote, pattern)) {
1451 | local = new File(this.currentLocalPath, remote);
1452 | if (local.exists()) {
1453 | Logger.warn("Warning: local file " + local.toString() + " overwritten.");
1454 | if(!local.delete())
1455 | Logger.warn("Warning: local file " + local.toString() + " could not be deleted.");
1456 | }
1457 | retrieveFilesRecursively(remote, remove);
1458 | }
1459 | }
1460 | }
1461 |
1462 | public boolean MOVEDOWN() {
1463 | if (this.cmd.length != 2) {
1464 | Logger.warn("Syntax: MOVEDOWN ");
1465 | return true;
1466 | }
1467 | try {
1468 | mget(this.cmd[1], true);
1469 | } catch (final IOException e) {
1470 | Logger.warn("Error: movedown failed (" + e.getMessage() + ")");
1471 | }
1472 | return true;
1473 | }
1474 |
1475 | /**
1476 | * public boolean MOVEUP() { }
1477 | *
1478 | * @return
1479 | */
1480 | public boolean MV() {
1481 | if (this.cmd.length != 3) {
1482 | Logger.warn("Syntax: MV ");
1483 | return true;
1484 | }
1485 | if (notConnected()) {
1486 | return LMV();
1487 | }
1488 | try {
1489 | // send rename commands
1490 | send("RNFR " + this.cmd[1]);
1491 | // read reply
1492 | String reply = receive();
1493 | if (isNotPositiveCompletion(reply)) {
1494 | throw new IOException(reply);
1495 | }
1496 | send("RNTO " + this.cmd[2]);
1497 | // read reply
1498 | reply = receive();
1499 | if (isNotPositiveCompletion(reply)) {
1500 | throw new IOException(reply);
1501 | }
1502 | } catch (final IOException e) {
1503 | Logger.warn("Error: rename of " + this.cmd[1] + " to " + this.cmd[2] + " failed.");
1504 | }
1505 | return true;
1506 | }
1507 |
1508 | public boolean NOOP() {
1509 | if (this.cmd.length != 1) {
1510 | Logger.warn("Syntax: NOOP (no parameter)");
1511 | return true;
1512 | }
1513 | try {
1514 | literal("NOOP");
1515 | } catch (final IOException e) {
1516 | Logger.warn("Error: server does not know how to do nothing");
1517 | }
1518 | return true;
1519 | }
1520 |
1521 | public boolean OPEN() {
1522 | if ((this.cmd.length < 2) || (this.cmd.length > 3)) {
1523 | Logger.warn("Syntax: OPEN []");
1524 | return true;
1525 | }
1526 | int port = 21;
1527 | if (this.cmd.length == 3) {
1528 | try {
1529 | port = java.lang.Integer.parseInt(this.cmd[2]);
1530 | } catch (final NumberFormatException e) {
1531 | port = 21;
1532 | }
1533 | }
1534 | if (this.cmd[1].indexOf(':',0) > 0) {
1535 | // port is given
1536 | port = java.lang.Integer.parseInt(this.cmd[1].substring(this.cmd[1].indexOf(':',0) + 1));
1537 | this.cmd[1] = this.cmd[1].substring(0, this.cmd[1].indexOf(':',0));
1538 | }
1539 | try {
1540 | open(this.cmd[1], port);
1541 | Logger.info("---- Connection to " + this.cmd[1] + " established.");
1542 | this.prompt = "ftp [" + this.cmd[1] + "]>";
1543 | } catch (final IOException e) {
1544 | Logger.warn("Error: connecting " + this.cmd[1] + " on port " + port + " failed: " + e.getMessage());
1545 | }
1546 | return true;
1547 | }
1548 |
1549 | public void open(final String host, final int port) throws IOException {
1550 | if (this.ControlSocket != null) {
1551 | exec("close", false); // close any existing connections first
1552 | }
1553 |
1554 | try {
1555 | this.ControlSocket = new Socket();
1556 | this.ControlSocket.setSoTimeout(getTimeout());
1557 | this.ControlSocket.setKeepAlive(true);
1558 | this.ControlSocket.setTcpNoDelay(true); // no accumulation until buffer is full
1559 | this.ControlSocket.setSoLinger(false, getTimeout()); // !wait for all data being written on close()
1560 | this.ControlSocket.setSendBufferSize(1440); // read http://www.cisco.com/warp/public/105/38.shtml
1561 | this.ControlSocket.setReceiveBufferSize(1440); // read http://www.cisco.com/warp/public/105/38.shtml
1562 | this.ControlSocket.connect(new InetSocketAddress(host, port), 1000);
1563 | this.clientInput = new BufferedReader(new InputStreamReader(this.ControlSocket.getInputStream()));
1564 | this.clientOutput = new DataOutputStream(new BufferedOutputStream(this.ControlSocket.getOutputStream()));
1565 |
1566 | // read and return server message
1567 | this.host = host;
1568 | this.port = port;
1569 | this.remotemessage = receive();
1570 | if ((this.remotemessage != null) && (this.remotemessage.length() > 3)) {
1571 | this.remotemessage = this.remotemessage.substring(4);
1572 | }
1573 | } catch (final IOException e) {
1574 | // if a connection was opened, it should not be used
1575 | closeConnection();
1576 | throw new IOException(e.getMessage());
1577 | }
1578 | }
1579 |
1580 | /**
1581 | * @return
1582 | */
1583 | public boolean notConnected() {
1584 | return this.ControlSocket == null;
1585 | }
1586 |
1587 | /**
1588 | * close all sockets
1589 | *
1590 | * @throws IOException
1591 | */
1592 | private void closeConnection() throws IOException {
1593 | // cleanup
1594 | if (this.clientOutput != null) this.clientOutput.close();
1595 | if (this.clientInput != null) this.clientInput.close();
1596 | if (this.ControlSocket != null) this.ControlSocket.close();
1597 | if (this.DataSocketActive != null) this.DataSocketActive.close();
1598 | if (this.DataSocketPassive != null) this.DataSocketPassive.close();
1599 | }
1600 |
1601 | public boolean PROMPT() {
1602 | Logger.warn("prompt is always off");
1603 | return true;
1604 | }
1605 |
1606 | public boolean PUT() {
1607 | if ((this.cmd.length < 2) || (this.cmd.length > 3)) {
1608 | Logger.warn("Syntax: PUT []");
1609 | return true;
1610 | }
1611 | final File local = new File(this.currentLocalPath, this.cmd[1]);
1612 | final String remote = (this.cmd.length == 2) ? local.getName() : this.cmd[2];
1613 | if (!local.exists()) {
1614 | Logger.warn("Error: local file " + local.toString() + " does not exist.");
1615 | Logger.warn(" Remote file " + remote + " not overwritten.");
1616 | } else {
1617 | try {
1618 | put(local.getAbsolutePath(), remote);
1619 | } catch (final IOException e) {
1620 | Logger.warn("Error: transmitting file " + local.toString() + " failed.");
1621 | }
1622 | }
1623 | return true;
1624 | }
1625 |
1626 | public boolean PWD() {
1627 | if (this.cmd.length > 1) {
1628 | Logger.warn("Syntax: PWD (no parameter)");
1629 | return true;
1630 | }
1631 | if (notConnected()) {
1632 | return LPWD();
1633 | }
1634 | try {
1635 | Logger.info("---- Current remote path is: " + pwd());
1636 | } catch (final IOException e) {
1637 | Logger.warn("Error: remote path not available");
1638 | }
1639 | return true;
1640 | }
1641 |
1642 | private String pwd() throws IOException {
1643 | // send pwd command
1644 | send("PWD");
1645 |
1646 | // read current directory
1647 | final String reply = receive();
1648 | if (isNotPositiveCompletion(reply)) {
1649 | throw new IOException(reply);
1650 | }
1651 |
1652 | // parse directory name out of the reply
1653 | return reply.substring(5, reply.lastIndexOf('"'));
1654 | }
1655 |
1656 | public boolean REMOTEHELP() {
1657 | if (this.cmd.length != 1) {
1658 | Logger.warn("Syntax: REMOTEHELP (no parameter)");
1659 | return true;
1660 | }
1661 | try {
1662 | literal("HELP");
1663 | } catch (final IOException e) {
1664 | Logger.warn("Error: remote help not supported by server.");
1665 | }
1666 | return true;
1667 | }
1668 |
1669 | public boolean RMDIR() {
1670 | if (this.cmd.length != 2) {
1671 | Logger.warn("Syntax: RMDIR ");
1672 | return true;
1673 | }
1674 | if (notConnected()) {
1675 | return LRMDIR();
1676 | }
1677 | try {
1678 | rmForced(this.cmd[1]);
1679 | } catch (final IOException e) {
1680 | Logger.warn("Error: deletion of folder " + this.cmd[1] + " failed.");
1681 | }
1682 | return true;
1683 | }
1684 |
1685 | public boolean QUIT() {
1686 | if (!notConnected()) {
1687 | exec("close", false);
1688 | }
1689 | return false;
1690 | }
1691 |
1692 | public boolean RECV() {
1693 | return GET();
1694 | }
1695 |
1696 | /**
1697 | * size of file on ftp-server (maybe size of directory-entry is possible)
1698 | *
1699 | * @param path
1700 | * @return size in bytes or -1 if size cannot be determinied
1701 | */
1702 | public long fileSize(final String path) {
1703 | long size = -1;
1704 | try {
1705 | // extended FTP
1706 | size = size(path);
1707 | } catch (final IOException e) {
1708 | // else with LIST-data
1709 | final entryInfo info = fileInfo(path);
1710 | if (info != null) {
1711 | size = info.size;
1712 | }
1713 | }
1714 | return size;
1715 | }
1716 |
1717 | public int size(final String path) throws IOException {
1718 | // get the size of a file. If the given path targets to a directory, a
1719 | // -1 is returned
1720 | // this function is not supported by standard rfc 959. The method is
1721 | // descibed in RFC 3659 Extensions to FTP
1722 | // if the method is not supported by the target server, this throws an
1723 | // IOException with the
1724 | // server response as exception message
1725 |
1726 | // send command to the control port
1727 | send("SIZE " + path);
1728 |
1729 | // read status of the command from the control port
1730 | final String reply = receive();
1731 |
1732 | if (getStatusCode(reply) != 213) {
1733 | throw new IOException(reply);
1734 | }
1735 |
1736 | try {
1737 | return Integer.parseInt(reply.substring(4));
1738 | } catch (final NumberFormatException e) {
1739 | throw new IOException(reply);
1740 | }
1741 | }
1742 |
1743 | public boolean USER() {
1744 | if (this.cmd.length != 3) {
1745 | Logger.warn("Syntax: USER ");
1746 | return true;
1747 | }
1748 | try {
1749 | login(this.cmd[1], this.cmd[2]);
1750 | Logger.info("---- Granted access for user " + this.cmd[1] + ".");
1751 | } catch (final IOException e) {
1752 | Logger.warn("Error: authorization of user " + this.cmd[1] + " failed: " + e.getMessage());
1753 | }
1754 | return true;
1755 | }
1756 |
1757 | public boolean APPEND() {
1758 | Logger.warn("not yet supported");
1759 | return true;
1760 | }
1761 |
1762 | public boolean HELP() {
1763 | Logger.info("---- ftp HELP ----");
1764 | Logger.info("");
1765 | Logger.info("This ftp client shell can act as command shell for the local host as well for the");
1766 | Logger.info("remote host. Commands that point to the local host are preceded by 'L'.");
1767 | Logger.info("");
1768 | Logger.info("Supported Commands:");
1769 | Logger.info("ASCII");
1770 | Logger.info(" switch remote server to ASCII transfer mode");
1771 | Logger.info("BINARY");
1772 | Logger.info(" switch remote server to BINARY transfer mode");
1773 | Logger.info("BYE");
1774 | Logger.info(" quit the command shell (same as EXIT)");
1775 | Logger.info("CD ");
1776 | Logger.info(" change remote path");
1777 | Logger.info("CLOSE");
1778 | Logger.info(" close connection to remote host (same as DISCONNECT)");
1779 | Logger.info("DEL ");
1780 | Logger.info(" delete file on remote server (same as RM)");
1781 | Logger.info("RM ");
1782 | Logger.info(" remove file from remote server (same as DEL)");
1783 | Logger.info("DIR [|] ");
1784 | Logger.info(" print file information for remote directory or file");
1785 | Logger.info("DISCONNECT");
1786 | Logger.info(" disconnect from remote server (same as CLOSE)");
1787 | Logger.info("EXIT");
1788 | Logger.info(" quit the command shell (same as BYE)");
1789 | Logger.info("GET []");
1790 | Logger.info(" load from remote server and store it locally,");
1791 | Logger.info(" optionally to . if the is a directory,");
1792 | Logger.info(" then all files in that directory are retrieved,");
1793 | Logger.info(" including recursively all subdirectories.");
1794 | Logger.info("GLOB");
1795 | Logger.info(" toggles globbing: matching with wild cards or not");
1796 | Logger.info("COPY");
1797 | Logger.info(" copies local files");
1798 | Logger.info("LCD ");
1799 | Logger.info(" local directory change");
1800 | Logger.info("LDEL ");
1801 | Logger.info(" local file delete");
1802 | Logger.info("LDIR");
1803 | Logger.info(" shows local directory content");
1804 | Logger.info("LITERAL []");
1805 | Logger.info(" Sends FTP commands as documented in RFC959");
1806 | Logger.info("LLS");
1807 | Logger.info(" as LDIR");
1808 | Logger.info("LMD");
1809 | Logger.info(" as LMKDIR");
1810 | Logger.info("LMV ");
1811 | Logger.info(" copies local files");
1812 | Logger.info("LPWD");
1813 | Logger.info(" prints local path");
1814 | Logger.info("LRD");
1815 | Logger.info(" as LMKDIR");
1816 | Logger.info("LRMD ");
1817 | Logger.info(" deletes local directory ");
1818 | Logger.info("LRM ");
1819 | Logger.info(" deletes local file ");
1820 | Logger.info("LS [|]");
1821 | Logger.info(" prints list of remote directory or information of file ");
1822 | Logger.info("MDIR");
1823 | Logger.info(" as MKDIR");
1824 | Logger.info("MGET ");
1825 | Logger.info(" copies files from remote server that fits into the");
1826 | Logger.info(" pattern to the local path.");
1827 | Logger.info("MOVEDOWN ");
1828 | Logger.info(" copies files from remote server as with MGET");
1829 | Logger.info(" and deletes them afterwards on the remote server");
1830 | Logger.info("MV ");
1831 | Logger.info(" moves or renames files on the local host");
1832 | Logger.info("NOOP");
1833 | Logger.info(" sends the NOOP command to the remote server (which does nothing)");
1834 | Logger.info(" This command is usually used to measure the speed of the remote server.");
1835 | Logger.info("OPEN []");
1836 | Logger.info(" connects the ftp shell to the remote server . Optionally,");
1837 | Logger.info(" a port number can be given, the default port number is 21.");
1838 | Logger.info(" Example: OPEN localhost:2121 or OPEN 192.168.0.1 2121");
1839 | Logger.info("PROMPT");
1840 | Logger.info(" compatibility command, that usually toggles beween prompting on or off.");
1841 | Logger.info(" ftp has prompting switched off by default and cannot switched on.");
1842 | Logger.info("PUT []");
1843 | Logger.info(" copies the to the remote server to the current remote path or");
1844 | Logger.info(" optionally to the given path.");
1845 | Logger.info("PWD");
1846 | Logger.info(" prints current path on the remote server.");
1847 | Logger.info("REMOTEHELP");
1848 | Logger.info(" asks the remote server to print the help text of the remote server");
1849 | Logger.info("RMDIR ");
1850 | Logger.info(" removes the directory on the remote server");
1851 | Logger.info("QUIT");
1852 | Logger.info(" exits the ftp application");
1853 | Logger.info("RECV");
1854 | Logger.info(" as GET");
1855 | Logger.info("USER ");
1856 | Logger.info(" Loggers into the remote server with the user ");
1857 | Logger.info(" and the password ");
1858 | Logger.info("");
1859 | Logger.info("");
1860 | Logger.info("EXAMPLE:");
1861 | Logger.info("a standard sessions looks like this");
1862 | Logger.info(">open 192.168.0.1:2121");
1863 | Logger.info(">user anonymous bob");
1864 | Logger.info(">pwd");
1865 | Logger.info(">ls");
1866 | Logger.info(">.....");
1867 | Logger.info("");
1868 | Logger.info("");
1869 | return true;
1870 | }
1871 |
1872 | public boolean QUOTE() {
1873 | Logger.warn("not yet supported");
1874 | return true;
1875 | }
1876 |
1877 | public boolean BELL() {
1878 | Logger.warn("not yet supported");
1879 | return true;
1880 | }
1881 |
1882 | public boolean MDELETE() {
1883 | Logger.warn("not yet supported");
1884 | return true;
1885 | }
1886 |
1887 | public boolean SEND() {
1888 | Logger.warn("not yet supported");
1889 | return true;
1890 | }
1891 |
1892 | public boolean DEBUG() {
1893 | Logger.warn("not yet supported");
1894 | return true;
1895 | }
1896 |
1897 | public boolean MLS() {
1898 | Logger.warn("not yet supported");
1899 | return true;
1900 | }
1901 |
1902 | public boolean TRACE() {
1903 | Logger.warn("not yet supported");
1904 | return true;
1905 | }
1906 |
1907 | public boolean MPUT() {
1908 | Logger.warn("not yet supported");
1909 | return true;
1910 | }
1911 |
1912 | public boolean TYPE() {
1913 | Logger.warn("not yet supported");
1914 | return true;
1915 | }
1916 |
1917 | public boolean CREATE() {
1918 | Logger.warn("not yet supported");
1919 | return true;
1920 | }
1921 |
1922 | // helper functions
1923 |
1924 | private boolean matches(final String name, final String pattern) {
1925 | // checks whether the string name matches with the pattern
1926 | // the pattern may contain characters '*' as wildcard for several
1927 | // characters (also none) and '?' to match exactly one characters
1928 | // Logger.info("MATCH " + name + " " + pattern);
1929 | if (!this.glob) {
1930 | return name.equals(pattern);
1931 | }
1932 | if (pattern.equals("*")) {
1933 | return true;
1934 | }
1935 | if (pattern.length() > 0 && pattern.charAt(0) == '*' && pattern.endsWith("*")) {
1936 | return // avoid recursion deadlock
1937 | ((matches(name, pattern.substring(1))) || (matches(name, pattern.substring(0, pattern.length() - 1))));
1938 | }
1939 | try {
1940 | int i = pattern.indexOf('?',0);
1941 | if (i >= 0) {
1942 | if (!(matches(name.substring(0, i), pattern.substring(0, i)))) {
1943 | return false;
1944 | }
1945 | return (matches(name.substring(i + 1), pattern.substring(i + 1)));
1946 | }
1947 | i = pattern.indexOf('*',0);
1948 | if (i >= 0) {
1949 | if (!(name.substring(0, i).equals(pattern.substring(0, i)))) {
1950 | return false;
1951 | }
1952 | if (pattern.length() == i + 1) {
1953 | return true; // pattern would be '*'
1954 | }
1955 | return (matches(reverse(name.substring(i)), reverse(pattern.substring(i + 1)) + "*"));
1956 | }
1957 | return name.equals(pattern);
1958 | } catch (final java.lang.StringIndexOutOfBoundsException e) {
1959 | // this is normal. it's a lazy implementation
1960 | return false;
1961 | }
1962 | }
1963 |
1964 | private String reverse(final String s) {
1965 | if (s.length() < 2) {
1966 | return s;
1967 | }
1968 | return reverse(s.substring(1)) + s.charAt(0);
1969 | }
1970 |
1971 | // protocoll socket commands
1972 |
1973 | private void send(final String buf) throws IOException {
1974 | if (this.clientOutput == null) return;
1975 | final byte[] b = buf.getBytes(StandardCharsets.UTF_8);
1976 | this.clientOutput.write(b, 0, b.length);
1977 | this.clientOutput.write('\r');
1978 | this.clientOutput.write('\n');
1979 | this.clientOutput.flush();
1980 | if (buf.startsWith("PASS")) {
1981 | Logger.info("> PASS ********");
1982 | } else {
1983 | Logger.info("> " + buf);
1984 | }
1985 | }
1986 |
1987 | private String receive() throws IOException {
1988 | // last reply starts with 3 digit number followed by space
1989 | String reply;
1990 |
1991 | while (true) {
1992 | if (this.clientInput == null) {
1993 | throw new IOException("Server has presumably shut down the connection.");
1994 | }
1995 | reply = this.clientInput.readLine();
1996 |
1997 | // sanity check
1998 | if (reply == null) {
1999 | throw new IOException("Server has presumably shut down the connection.");
2000 | }
2001 |
2002 | Logger.info("< " + reply);
2003 | // serverResponse.addElement(reply);
2004 |
2005 | if (reply.length() >= 4 && Character.isDigit(reply.charAt(0)) && Character.isDigit(reply.charAt(1))
2006 | && Character.isDigit(reply.charAt(2)) && (reply.charAt(3) == ' ')) {
2007 | break; // end of reply
2008 | }
2009 | }
2010 | // return last reply line
2011 | return reply;
2012 | }
2013 |
2014 | private void sendTransferType(final char type) throws IOException {
2015 | send("TYPE " + type);
2016 |
2017 | final String reply = receive();
2018 | if (isNotPositiveCompletion(reply)) {
2019 | throw new IOException(reply);
2020 | }
2021 | }
2022 |
2023 | /**
2024 | * @return
2025 | * @throws IOException
2026 | */
2027 | private Socket getDataSocket() throws IOException {
2028 | Socket data;
2029 | if (isPassive()) {
2030 | if (this.DataSocketPassive == null) {
2031 | createDataSocket();
2032 | }
2033 | data = this.DataSocketPassive;
2034 | } else {
2035 | if (this.DataSocketActive == null) {
2036 | createDataSocket();
2037 | }
2038 | data = this.DataSocketActive.accept();
2039 | }
2040 | return data;
2041 | }
2042 |
2043 | /**
2044 | * create data channel
2045 | *
2046 | * @throws IOException
2047 | */
2048 | private void createDataSocket() throws IOException {
2049 | if (isPassive()) {
2050 | try {
2051 | createPassiveDataPort();
2052 | } catch (final IOException e) {
2053 | createActiveDataPort();
2054 | }
2055 | } else {
2056 | try {
2057 | createActiveDataPort();
2058 | } catch (final IOException e) {
2059 | createPassiveDataPort();
2060 | }
2061 | }
2062 | }
2063 |
2064 | /**
2065 | * use passive ftp?
2066 | *
2067 | * @return
2068 | */
2069 | private boolean isPassive() {
2070 | return this.DataSocketPassiveMode;
2071 | }
2072 |
2073 | private void createActiveDataPort() throws IOException {
2074 | // create data socket and bind it to free port available
2075 | this.DataSocketActive = new ServerSocket(0);
2076 | this.DataSocketActive.setSoTimeout(getTimeout());
2077 | this.DataSocketActive.setReceiveBufferSize(1440); // read http://www.cisco.com/warp/public/105/38.shtml
2078 | applyDataSocketTimeout();
2079 |
2080 | // get port socket has been bound to
2081 | final int DataPort = this.DataSocketActive.getLocalPort();
2082 |
2083 | // client ip
2084 | // InetAddress LocalIp = serverCore.publicIP();
2085 | // InetAddress LocalIp =
2086 | // DataSocketActive.getInetAddress().getLocalHost();
2087 |
2088 | // save ip address in high byte order
2089 | // byte[] Bytes = LocalIp.getAddress();
2090 | final byte[] b = Domains.myPublicIPv4().iterator().next().getAddress();
2091 |
2092 | // bytes greater than 127 should not be printed as negative
2093 | final short[] s = new short[4];
2094 | for (int i = 0; i < 4; i++) {
2095 | s[i] = b[i];
2096 | if (s[i] < 0) {
2097 | s[i] += 256;
2098 | }
2099 | }
2100 |
2101 | // send port command via control socket:
2102 | // four ip address shorts encoded and two port shorts encoded
2103 | send("PORT "
2104 | +
2105 | // "127,0,0,1," +
2106 | s[0] + "," + s[1] + "," + s[2] + "," + s[3] + "," + ((DataPort & 0xff00) >> 8)
2107 | + "," + (DataPort & 0x00ff));
2108 |
2109 | // read status of the command from the control port
2110 | final String reply = receive();
2111 |
2112 | // check status code
2113 | if (isNotPositiveCompletion(reply)) {
2114 | throw new IOException(reply);
2115 | }
2116 |
2117 | this.DataSocketPassiveMode = false;
2118 | }
2119 |
2120 | private void createPassiveDataPort() throws IOException {
2121 | // send port command via control socket:
2122 | // four ip address shorts encoded and two port shorts encoded
2123 | send("PASV");
2124 |
2125 | // read status of the command from the control port
2126 | String reply = receive();
2127 |
2128 | // check status code
2129 | if (getStatusCode(reply) != 227) {
2130 | throw new IOException(reply);
2131 | }
2132 |
2133 | // parse the status return: address should start at the first number
2134 | int pos = 4;
2135 | while ((pos < reply.length()) && ((reply.charAt(pos) < '0') || (reply.charAt(pos) > '9'))) {
2136 | pos++;
2137 | }
2138 | if (pos >= reply.length()) {
2139 | throw new IOException(reply + " [could not parse return code]");
2140 | }
2141 | reply = reply.substring(pos);
2142 | pos = reply.length() - 1;
2143 | while ((pos >= 0) && ((reply.charAt(pos) < '0') || (reply.charAt(pos) > '9'))) {
2144 | pos--;
2145 | }
2146 | if (pos < 0) {
2147 | throw new IOException("[could not parse return code: no numbers]");
2148 | }
2149 | reply = reply.substring(0, pos + 1);
2150 | final StringTokenizer st = new StringTokenizer(reply, ",");
2151 | if (st.countTokens() != 6) {
2152 | throw new IOException("[could not parse return code: wrong number of numbers]");
2153 | }
2154 |
2155 | // set the data host and port
2156 | final int a = Integer.parseInt(st.nextToken());
2157 | final int b = Integer.parseInt(st.nextToken());
2158 | final int c = Integer.parseInt(st.nextToken());
2159 | final int d = Integer.parseInt(st.nextToken());
2160 | final InetAddress datahost = Domains.dnsResolve(a + "." + b + "." + c + "." + d);
2161 | final int high = Integer.parseInt(st.nextToken());
2162 | final int low = Integer.parseInt(st.nextToken());
2163 | if (high < 0 || high > 255 || low < 0 || low > 255) {
2164 | throw new IOException("[could not parse return code: syntax error]");
2165 | }
2166 | final int dataport = (high << 8) + low;
2167 |
2168 | this.DataSocketPassive = new Socket(datahost, dataport);
2169 | applyDataSocketTimeout();
2170 | this.DataSocketPassiveMode = true;
2171 | }
2172 |
2173 | /**
2174 | * closes data connection
2175 | *
2176 | * @throws IOException
2177 | */
2178 | private void closeDataSocket() throws IOException {
2179 | if (isPassive()) {
2180 | if (this.DataSocketPassive != null) {
2181 | this.DataSocketPassive.close();
2182 | this.DataSocketPassive = null;
2183 | }
2184 | } else {
2185 | if (this.DataSocketActive != null) {
2186 | this.DataSocketActive.close();
2187 | this.DataSocketActive = null;
2188 | }
2189 | }
2190 | }
2191 |
2192 | /**
2193 | * sets the timeout for the socket
2194 | *
2195 | * @throws SocketException
2196 | */
2197 | private void applyDataSocketTimeout() throws SocketException {
2198 | if (isPassive()) {
2199 | if (this.DataSocketPassive != null) {
2200 | this.DataSocketPassive.setSoTimeout(this.DataSocketTimeout * 1000);
2201 | }
2202 | } else {
2203 | if (this.DataSocketActive != null) {
2204 | this.DataSocketActive.setSoTimeout(this.DataSocketTimeout * 1000);
2205 | }
2206 | }
2207 | }
2208 |
2209 | private void get(final String fileDest, final String fileName) throws IOException {
2210 | // store time for statistics
2211 | final long start = System.currentTimeMillis();
2212 |
2213 | createDataSocket();
2214 |
2215 | // set type of the transfer
2216 | sendTransferType(transferType);
2217 |
2218 | // send command to the control port
2219 | send("RETR " + fileName);
2220 |
2221 | // read status of the command from the control port
2222 | final String reply = receive();
2223 |
2224 | // get status code
2225 | final int status = getStatus(reply);
2226 |
2227 | // starting data transaction
2228 | if (status == 1) {
2229 | Socket data = null;
2230 | InputStream ClientStream = null;
2231 | RandomAccessFile outFile = null;
2232 | int length = 0;
2233 | try {
2234 | data = getDataSocket();
2235 | ClientStream = data.getInputStream();
2236 |
2237 | // create local file
2238 | if (fileDest == null) {
2239 | outFile = new RandomAccessFile(fileName, "rw");
2240 | } else {
2241 | outFile = new RandomAccessFile(fileDest, "rw");
2242 | }
2243 |
2244 | // write remote file to local file
2245 | final byte[] block = new byte[blockSize];
2246 | int numRead;
2247 |
2248 | while ((numRead = ClientStream.read(block)) != -1) {
2249 | outFile.write(block, 0, numRead);
2250 | length = length + numRead;
2251 | }
2252 | } finally {
2253 | // shutdown connection
2254 | if(outFile != null) {
2255 | outFile.close();
2256 | }
2257 | if(ClientStream != null) {
2258 | ClientStream.close();
2259 | }
2260 | closeDataSocket();
2261 | }
2262 |
2263 | // after stream is empty we should get control completion echo
2264 | /*reply =*/ receive();
2265 | // boolean success = !isNotPositiveCompletion(reply);
2266 | // if (!success) throw new IOException(reply);
2267 |
2268 | // write statistics
2269 | final long stop = System.currentTimeMillis();
2270 | Logger.info(" ---- downloaded "
2271 | + ((length < 2048) ? length + " bytes" : (length / 1024) + " kbytes")
2272 | + " in "
2273 | + (((stop - start) < 2000) ? (stop - start) + " milliseconds"
2274 | : (((int) ((stop - start) / 100)) / 10) + " seconds"));
2275 | if (start == stop) {
2276 | Logger.warn("start == stop");
2277 | } else {
2278 | Logger.info(" (" + (length * 1000 / 1024 / (stop - start)) + " kbytes/second)");
2279 | }
2280 |
2281 | } else {
2282 | throw new IOException(reply);
2283 | }
2284 | }
2285 |
2286 |
2287 | public byte[] get(final String fileName) throws IOException {
2288 |
2289 | createDataSocket();
2290 |
2291 | // set type of the transfer
2292 | sendTransferType(transferType);
2293 |
2294 | // send command to the control port
2295 | send("RETR " + fileName);
2296 |
2297 | // read status of the command from the control port
2298 | final String reply = receive();
2299 |
2300 | // get status code
2301 | final int status = getStatus(reply);
2302 |
2303 | // starting data transaction
2304 | if (status == 1) {
2305 | Socket data = null;
2306 | InputStream ClientStream = null;
2307 | final ByteArrayOutputStream os = new ByteArrayOutputStream();
2308 | int length = 0;
2309 | try {
2310 | data = getDataSocket();
2311 | ClientStream = data.getInputStream();
2312 |
2313 | // write remote file to local file
2314 | final byte[] block = new byte[blockSize];
2315 | int numRead;
2316 |
2317 | while ((numRead = ClientStream.read(block)) != -1) {
2318 | os.write(block, 0, numRead);
2319 | length = length + numRead;
2320 | }
2321 | } finally {
2322 | // shutdown connection
2323 | if (ClientStream != null) {
2324 | ClientStream.close();
2325 | }
2326 | closeDataSocket();
2327 | }
2328 |
2329 | // after stream is empty we should get control completion echo
2330 | /*reply =*/ receive();
2331 | // boolean success = !isNotPositiveCompletion(reply);
2332 | return os.toByteArray();
2333 | }
2334 | throw new IOException(reply);
2335 | }
2336 |
2337 |
2338 | private void put(final String fileName, final String fileDest) throws IOException {
2339 |
2340 | createDataSocket();
2341 |
2342 | // set type of the transfer
2343 | sendTransferType(transferType);
2344 |
2345 | // send command to the control port
2346 | if (fileDest == null) {
2347 | send("STOR " + fileName);
2348 | } else {
2349 | send("STOR " + fileDest);
2350 | }
2351 |
2352 | // read status of the command from the control port
2353 | String reply = receive();
2354 |
2355 | // starting data transaction
2356 | if (getStatus(reply) == 1) {
2357 | final Socket data = getDataSocket();
2358 | final OutputStream ClientStream = data.getOutputStream();
2359 |
2360 | // read from local file
2361 | final RandomAccessFile inFile = new RandomAccessFile(fileName, "r");
2362 |
2363 | // write remote file to local file
2364 | final byte[] block = new byte[blockSize];
2365 | int numRead;
2366 |
2367 | while ((numRead = inFile.read(block)) >= 0) {
2368 | ClientStream.write(block, 0, numRead);
2369 | }
2370 |
2371 | // shutdown and cleanup
2372 | inFile.close();
2373 | ClientStream.close();
2374 |
2375 | // shutdown remote client connection
2376 | data.close();
2377 |
2378 | // after stream is empty we should get control completion echo
2379 | reply = receive();
2380 | final boolean success = (getStatus(reply) == 2);
2381 |
2382 | if (!success) {
2383 | throw new IOException(reply);
2384 | }
2385 |
2386 | } else {
2387 | throw new IOException(reply);
2388 | }
2389 | }
2390 |
2391 | /**
2392 | * Login to server
2393 | *
2394 | * @param account
2395 | * @param password
2396 | * @throws IOException
2397 | */
2398 | public void login(final String account, final String password) throws IOException {
2399 | unsetLoginData();
2400 |
2401 | // send user name
2402 | send("USER " + account);
2403 |
2404 | String reply = receive();
2405 | switch (getStatus(reply)) {
2406 | case 2:
2407 | // User logged in, proceed.
2408 | break;
2409 | case 5:// 530 Not logged in.
2410 | case 4:
2411 | case 1:// in RFC959 an error (page 57, diagram for the Login
2412 | // sequence)
2413 | throw new IOException(reply);
2414 | default:
2415 | // send password
2416 | send("PASS " + password);
2417 |
2418 | reply = receive();
2419 | if (isNotPositiveCompletion(reply)) {
2420 | throw new IOException(reply);
2421 | }
2422 | }
2423 | setLoginData(account, password, reply);
2424 | }
2425 |
2426 | /**
2427 | * we are authorized to use the server
2428 | *
2429 | * @return
2430 | */
2431 | public boolean isLoggedIn() {
2432 | return (this.account != null && this.password != null && this.remotegreeting != null);
2433 | }
2434 |
2435 | /**
2436 | * remember username and password which were used to login
2437 | *
2438 | * @param account
2439 | * @param password
2440 | * @param reply
2441 | * remoteGreeting
2442 | */
2443 | private void setLoginData(final String account, final String password, final String reply) {
2444 | this.account = account;
2445 | this.password = password;
2446 | this.remotegreeting = reply;
2447 | }
2448 |
2449 | private void unsetLoginData() {
2450 | this.account = null;
2451 | this.password = null;
2452 | this.remotegreeting = null;
2453 | }
2454 |
2455 | public void sys() throws IOException {
2456 | // send system command
2457 | send("SYST");
2458 |
2459 | // check completion
2460 | final String systemType = receive();
2461 | if (isNotPositiveCompletion(systemType)) {
2462 | throw new IOException(systemType);
2463 | }
2464 |
2465 | // exclude status code from reply
2466 | this.remotesystem = systemType.substring(4);
2467 | }
2468 |
2469 | private void literal(final String commandLine) throws IOException {
2470 | // send the complete line
2471 | send(commandLine);
2472 |
2473 | // read reply
2474 | final String reply = receive();
2475 |
2476 | if (getStatus(reply) == 5) {
2477 | throw new IOException(reply);
2478 | }
2479 | }
2480 |
2481 | /**
2482 | * control socket timeout
2483 | *
2484 | * @return
2485 | */
2486 | public int getTimeout() {
2487 | return ControlSocketTimeout;
2488 | }
2489 |
2490 | /**
2491 | * after this time the data connection is closed
2492 | *
2493 | * @param timeout
2494 | * in seconds, 0 = infinite
2495 | */
2496 | public void setDataSocketTimeout(final int timeout) {
2497 | this.DataSocketTimeout = timeout;
2498 |
2499 | try {
2500 | applyDataSocketTimeout();
2501 | } catch (final SocketException e) {
2502 | Logger.warn("setDataSocketTimeout: " + e.getMessage());
2503 | }
2504 | }
2505 |
2506 | public static List dir(final String host, final String remotePath, final String account,
2507 | final String password, final boolean extended) {
2508 | try {
2509 | final FTPClient c = new FTPClient();
2510 | c.cmd = new String[] { "open", host };
2511 | c.OPEN();
2512 | c.cmd = new String[] { "user", account, password };
2513 | c.USER();
2514 | c.cmd = new String[] { "ls" };
2515 | final List v = c.list(remotePath, extended);
2516 | c.cmd = new String[] { "close" };
2517 | c.CLOSE();
2518 | c.cmd = new String[] { "exit" };
2519 | c.EXIT();
2520 | return v;
2521 | } catch (final RuntimeException e) {
2522 | return null;
2523 | } catch (final IOException e) {
2524 | return null;
2525 | }
2526 | }
2527 |
2528 | private static void dir(final String host, final String remotePath, final String account, final String password) {
2529 | try {
2530 | final FTPClient c = new FTPClient();
2531 | c.exec("open " + host, false);
2532 | c.exec("user " + account + " " + password, false);
2533 | c.exec("cd " + remotePath, false);
2534 | c.exec("ls", true);
2535 | c.exec("close", false);
2536 | c.exec("exit", false);
2537 | } catch (final RuntimeException e) {
2538 | }
2539 | }
2540 |
2541 | /**
2542 | * Asynchronously generate a list of all files on a ftp server using the anonymous account.
2543 | * @param host host name or address
2544 | * @param port ftp port
2545 | * @param user user name
2546 | * @param pw user password
2547 | * @param path path on the ftp site
2548 | * @param depth the maximum depth of the sub folders exploration.
2549 | * @return a queue asynchronously filled with entryInfo from all files of the ftp server
2550 | * @throws IOException when a error occurred
2551 | */
2552 | public static BlockingQueue sitelist(final String host, final int port, final String user, final String pw, final String path, final int depth) throws IOException {
2553 | final FTPClient ftpClient = new FTPClient();
2554 | ftpClient.open(host, port);
2555 | ftpClient.login(user, pw);
2556 | final LinkedBlockingQueue queue = new LinkedBlockingQueue();
2557 | new Thread() {
2558 | @Override
2559 | public void run() {
2560 | try {
2561 | Thread.currentThread().setName("FTP.sitelist(" + host + ":" + port + ")");
2562 | sitelist(ftpClient, path, queue, depth);
2563 | ftpClient.quit();
2564 | } catch (final Exception e) {} finally {
2565 | queue.add(POISON_entryInfo);
2566 | }
2567 | }
2568 | }.start();
2569 | return queue;
2570 | }
2571 |
2572 | /**
2573 | * Feed the queue with files under a given path on a ftp server using
2574 | * the anonymous account. When path is a file path, only one entry is added
2575 | * to the queue.
2576 | *
2577 | * @param ftpClient
2578 | * fptClient initialized with a host and login information
2579 | * @param path
2580 | * path on the host
2581 | * @param queue
2582 | * the entries queue to feed
2583 | * @param depth
2584 | * the maximum depth of the sub folders exploration.
2585 | * @throws IOException
2586 | * when a error occurred
2587 | */
2588 | private static void sitelist(final FTPClient ftpClient, String path, final LinkedBlockingQueue queue, final int depth) {
2589 | List list;
2590 | try {
2591 | list = ftpClient.list(path, true);
2592 | } catch (final IOException e) {
2593 | /* path might be a file path */
2594 | if (!path.endsWith("/")) {
2595 | entryInfo info = ftpClient.fileInfo(path);
2596 | if (info != null) {
2597 | queue.add(info);
2598 | } else {
2599 | /* We could not get file information, but this doesn't mean the file does not exist :
2600 | * we add it anyway to the queue */
2601 | info = new entryInfo();
2602 | info.name = path;
2603 | queue.add(info);
2604 | }
2605 | } else {
2606 | Logger.warn("cannot make sitelist", e);
2607 | }
2608 | return;
2609 | }
2610 | if (!path.endsWith("/")) path += "/";
2611 | entryInfo info;
2612 | // first find all files and add them to the crawl list
2613 | for (final String line : list) {
2614 | info = parseListData(line);
2615 | if (info != null && info.type == filetype.file && !info.name.endsWith(".") && !info.name.startsWith(".")) {
2616 | if (!info.name.startsWith("/")) info.name = path + info.name;
2617 | queue.add(info);
2618 | }
2619 | }
2620 | // then find all directories and add them recursively if depth is over zero
2621 | if(depth > 0) {
2622 | for (final String line : list) {
2623 | //System.out.println("LIST:" + line);
2624 | info = parseListData(line);
2625 | if (info != null && !info.name.endsWith(".") && !info.name.startsWith(".")) {
2626 | if (info.type == filetype.directory) {
2627 | sitelist(ftpClient, path + info.name, queue, depth - 1);
2628 | } else if (info.type == filetype.link) {
2629 | final int q = info.name.indexOf("->",0);
2630 | if (q >= 0 && info.name.indexOf("..", q) < 0) {
2631 | //System.out.println("*** LINK:" + line);
2632 | info.name = info.name.substring(0, q).trim();
2633 | sitelist(ftpClient, path + info.name, queue, depth - 1);
2634 | }
2635 |
2636 | }
2637 | }
2638 | }
2639 | }
2640 | }
2641 |
2642 | public StringBuilder dirhtml(String remotePath) throws IOException {
2643 | // returns a directory listing using an existing connection
2644 | if (isFolder(remotePath) && '/' != remotePath.charAt(remotePath.length()-1)) {
2645 | remotePath += '/';
2646 | }
2647 | final String pwd = pwd();
2648 | final List list = list(remotePath, true);
2649 | if (this.remotesystem == null) try {sys();} catch (final IOException e) {}
2650 | final String base = "ftp://" + ((this.account.equals(ANONYMOUS)) ? "" : (this.account + ":" + this.password + "@"))
2651 | + this.host + ((this.port == 21) ? "" : (":" + this.port)) + ((remotePath.length() > 0 && remotePath.charAt(0) == '/') ? "" : pwd + "/")
2652 | + remotePath;
2653 |
2654 | return dirhtml(base, this.remotemessage, this.remotegreeting, this.remotesystem, list, true);
2655 | }
2656 |
2657 | private static StringBuilder dirhtml(
2658 | final String host, final int port, final String remotePath,
2659 | final String account, final String password) throws IOException {
2660 | // opens a new connection and returns a directory listing as html
2661 | final FTPClient c = new FTPClient();
2662 | c.open(host, port);
2663 | c.login(account, password);
2664 | c.sys();
2665 | final StringBuilder page = c.dirhtml(remotePath);
2666 | c.quit();
2667 | return page;
2668 | }
2669 |
2670 | public static StringBuilder dirhtml(
2671 | final String base, final String servermessage, final String greeting,
2672 | final String system, final List list,
2673 | final boolean metaRobotNoindex) {
2674 | // this creates the html output from collected strings
2675 | final StringBuilder page = new StringBuilder(1024);
2676 | final String title = "Index of " + base;
2677 |
2678 | page.append("\n");
2679 | page.append("\n");
2680 | page.append(" ").append(title).append("\n");
2681 | page.append(" \n");
2682 | if (metaRobotNoindex) {
2683 | page.append(" \n");
2684 | }
2685 | page.append(" \n");
2686 | page.append("\n");
2687 | page.append(" ").append(title).append("
\n");
2688 | if (servermessage != null && greeting != null) {
2689 | page.append(" Server \"").append(servermessage).append("\" responded:\n");
2690 | page.append(" \n");
2691 | page.append(greeting);
2692 | page.append("\n");
2693 | page.append("
\n");
2694 | }
2695 | page.append("
\n");
2696 | page.append(" \n");
2697 | int nameStart, nameEnd;
2698 | entryInfo info;
2699 | for (final String line : list) {
2700 | info = parseListData(line);
2701 | if (info != null) {
2702 | // with link
2703 | nameStart = line.indexOf(info.name);
2704 | page.append(line.substring(0, nameStart));
2705 | page.append("").append(info.name).append("");
2706 | nameEnd = nameStart + info.name.length();
2707 | if (line.length() > nameEnd) {
2708 | page.append(line.substring(nameEnd));
2709 | }
2710 | } else if (line.startsWith("http://") || line.startsWith("ftp://") || line.startsWith("smb://") || line.startsWith("file://")) {
2711 | page.append("").append(line).append("");
2712 | } else {
2713 | // raw
2714 | page.append(line);
2715 | }
2716 | page.append('\n');
2717 | }
2718 | page.append(" \n");
2719 | page.append("
\n");
2720 | if (system != null) page.append(" System info: \"").append(system).append("\"\n");
2721 | page.append("\n");
2722 |
2723 | return page;
2724 | }
2725 |
2726 | public static String put(final String host, File localFile, String remotePath, final String remoteName,
2727 | final String account, final String password) throws IOException {
2728 | // returns the log
2729 | try {
2730 | final ByteArrayOutputStream bout = new ByteArrayOutputStream();
2731 | final PrintStream out = new PrintStream(bout);
2732 |
2733 | final ByteArrayOutputStream berr = new ByteArrayOutputStream();
2734 | final PrintStream err = new PrintStream(berr);
2735 |
2736 | final FTPClient c = new FTPClient();
2737 | c.exec("open " + host, false);
2738 | c.exec("user " + account + " " + password, false);
2739 | if (remotePath != null) {
2740 | remotePath = remotePath.replace('\\', '/');
2741 | c.exec("cd " + remotePath, false);
2742 | }
2743 | c.exec("binary", false);
2744 | if (localFile.isAbsolute()) {
2745 | c.exec("lcd \"" + localFile.getParent() + "\"", false);
2746 | localFile = new File(localFile.getName());
2747 | }
2748 | c.exec("put " + localFile.toString() + ((remoteName.isEmpty()) ? "" : (" " + remoteName)), false);
2749 | c.exec("close", false);
2750 | c.exec("exit", false);
2751 |
2752 | out.close();
2753 | err.close();
2754 |
2755 | final String outLog = bout.toString();
2756 | bout.close();
2757 |
2758 | final String errLog = berr.toString();
2759 | berr.close();
2760 |
2761 | if (errLog.length() > 0) {
2762 | throw new IOException("Ftp put failed:\n" + errLog);
2763 | }
2764 |
2765 | return outLog;
2766 | } catch (final IOException e) {
2767 | throw e;
2768 | }
2769 | }
2770 |
2771 | public static void get(final String host, String remoteFile, final File localPath, final String account, final String password) {
2772 | try {
2773 | final FTPClient c = new FTPClient();
2774 | if (remoteFile.isEmpty()) {
2775 | remoteFile = "/";
2776 | }
2777 | c.exec("open " + host, false);
2778 | c.exec("user " + account + " " + password, false);
2779 | c.exec("lcd " + localPath.getAbsolutePath(), false);
2780 | c.exec("binary", false);
2781 | c.exec("get " + remoteFile + " " + localPath.getAbsoluteFile().toString(), false);
2782 | c.exec("close", false);
2783 | c.exec("exit", false);
2784 | } catch (final RuntimeException e) {
2785 | }
2786 | }
2787 |
2788 | public static void getAnonymous(final String host, final String remoteFile, final File localPath) {
2789 | get(host, remoteFile, localPath, ANONYMOUS, "anomic");
2790 | }
2791 |
2792 | /**
2793 | * class that puts a file on a ftp-server can be used as a thread
2794 | */
2795 | static class pt implements Runnable {
2796 | String host;
2797 | File localFile;
2798 | String remotePath;
2799 | String remoteName;
2800 | String account;
2801 | String password;
2802 |
2803 | public pt(final String h, final File l, final String rp, final String rn, final String a, final String p) {
2804 | this.host = h;
2805 | this.localFile = l;
2806 | this.remotePath = rp;
2807 | this.remoteName = rn;
2808 | this.account = a;
2809 | this.password = p;
2810 | }
2811 |
2812 | @Override
2813 | public final void run() {
2814 | try {
2815 | Thread.currentThread().setName("FTP.pt(" + this.host + ")");
2816 | put(this.host, this.localFile, this.remotePath, this.remoteName, this.account, this.password);
2817 | } catch (final IOException e) {
2818 | Logger.warn(e.getMessage(), e);
2819 | }
2820 | }
2821 | }
2822 |
2823 | public static Thread putAsync(final String host, final File localFile, final String remotePath,
2824 | final String remoteName, final String account, final String password) {
2825 | final Thread t = new Thread(new pt(host, localFile, remotePath, remoteName, account, password), "ftp to " + host);
2826 | t.start();
2827 | return t; // return value can be used to determine status of transfer
2828 | // with isAlive() or join()
2829 | }
2830 |
2831 | private static void printHelp() {
2832 | System.out.println("FTPClient help");
2833 | System.out.println("----------");
2834 | System.out.println();
2835 | System.out.println("The following commands are supported");
2836 | System.out.println("java net.yacy.cora.protocol.ftp.FTPClient -h -- prints this help");
2837 | System.out.println("java net.yacy.cora.protocol.ftp.FTPClient -dir [':'] [ ]");
2838 | System.out.println("java net.yacy.cora.protocol.ftp.FTPClient -htmldir ");
2839 | System.out.println("java net.yacy.cora.protocol.ftp.FTPClient -get [':'] [ ]");
2840 | System.out.println("java net.yacy.cora.protocol.ftp.FTPClient -put [':'] ");
2841 | System.out.println("java net.yacy.cora.protocol.ftp.FTPClient -sitelist ");
2842 | System.out.println();
2843 | }
2844 |
2845 | public static void main(final String[] args) {
2846 | try {
2847 | System.out.println("WELCOME TO THE ANOMIC FTP CLIENT v" + vDATE);
2848 | System.out.println("Visit http://www.anomic.de and support shareware!");
2849 | System.out.println("try -h for command line options");
2850 | System.out.println();
2851 | if (args.length == 1) {
2852 | if (args[0].equals("-h")) {
2853 | printHelp();
2854 | }
2855 | } else if (args.length == 2) {
2856 | printHelp();
2857 | } else if (args.length == 3) {
2858 | if (args[0].equals("-dir")) {
2859 | dir(args[1], args[2], ANONYMOUS, "anomic@");
2860 | } else if (args[0].equals("-htmldir")) {
2861 | final File file = new File("dirindex.html");
2862 | try (FileOutputStream fos = new FileOutputStream(file);) {
2863 | final StringBuilder page = dirhtml(args[1], 21, args[2], ANONYMOUS, "anomic@");
2864 | fos.write(page.toString().getBytes(StandardCharsets.UTF_8));
2865 | } catch (final FileNotFoundException e) {
2866 | Logger.warn("", e);
2867 | } catch (final IOException e) {
2868 | Logger.warn("", e);
2869 | }
2870 | } else {
2871 | printHelp();
2872 | }
2873 | } else if (args.length == 4) {
2874 | if (args[0].equals("-get")) {
2875 | getAnonymous(args[1], args[2], new File(args[3]));
2876 | } else if (args[0].equals("-sitelist")) {
2877 | try {
2878 | final BlockingQueue q = sitelist(args[1], Integer.parseInt(args[2]), ANONYMOUS, "anomic", "/", Integer.parseInt(args[3]));
2879 | entryInfo entry;
2880 | while ((entry = q.take()) != FTPClient.POISON_entryInfo) {
2881 | System.out.println(entry.toString());
2882 | }
2883 | } catch (final FileNotFoundException e) {
2884 | Logger.warn("", e);
2885 | } catch (final IOException e) {
2886 | Logger.warn("", e);
2887 | } catch (final InterruptedException e) {
2888 | Logger.warn("", e);
2889 | }
2890 | } else {
2891 | printHelp();
2892 | }
2893 | } else if (args.length == 5) {
2894 | if (args[0].equals("-dir")) {
2895 | dir(args[1], args[2], args[3], args[4]);
2896 | } else {
2897 | printHelp();
2898 | }
2899 | } else if (args.length == 6) {
2900 | if (args[0].equals("-get")) {
2901 | get(args[1], args[2], new File(args[3]), args[4], args[5]);
2902 | } else if (args[0].equals("-put")) {
2903 | try {
2904 | put(args[1], new File(args[2]), args[3], "", args[4], args[5]);
2905 | } catch (final IOException e) {
2906 | Logger.warn(e.getMessage(), e);
2907 | }
2908 | } else {
2909 | printHelp();
2910 | }
2911 | } else {
2912 | printHelp();
2913 | }
2914 | } catch (final Exception e) {
2915 |
2916 | }
2917 | }
2918 | }
2919 |
--------------------------------------------------------------------------------