├── .ci └── notify_github.sh ├── .gitattributes ├── .github └── workflows │ ├── gradle.yml │ └── release.yml ├── .gitignore ├── .gitlab-ci.yml ├── Dockerfile ├── LICENSE.md ├── README.md ├── build.gradle ├── dist ├── MServer.sh ├── STARTEN__MServer.sh ├── STOPPEN__MServer.sh ├── bannedFilmList.txt ├── live-streams.json ├── mserver.xml └── upload.xml ├── gradle └── wrapper │ ├── gradle-wrapper.jar │ └── gradle-wrapper.properties ├── gradlew ├── gradlew.bat ├── jdk8.sh ├── settings.gradle └── src ├── main ├── java │ └── mServer │ │ ├── MServer.java │ │ ├── Main.java │ │ ├── crawler │ │ ├── AddToFilmlist.java │ │ ├── BannedFilmFilter.java │ │ ├── Crawler.java │ │ ├── CrawlerConfig.java │ │ ├── CrawlerTool.java │ │ ├── FilmeSuchen.java │ │ ├── GetUrl.java │ │ ├── ListeRunSender.java │ │ ├── RunSender.java │ │ ├── gui │ │ │ ├── Data.java │ │ │ ├── MSG.java │ │ │ ├── MSearchGuiController.java │ │ │ ├── MSearchGuiLoad.java │ │ │ ├── MSearchLoad.java │ │ │ ├── PanelDelController.java │ │ │ ├── PanelSearchController.java │ │ │ └── PanelToolController.java │ │ └── sender │ │ │ ├── ConfigReader.java │ │ │ ├── CrawlerConfig.java │ │ │ ├── Film.java │ │ │ ├── MediathekBr.java │ │ │ ├── MediathekCrawler.java │ │ │ ├── MediathekOrf.java │ │ │ ├── MediathekReader.java │ │ │ ├── MediathekSrfPod.java │ │ │ ├── ard │ │ │ ├── ArdConstants.java │ │ │ ├── ArdCrawler.java │ │ │ ├── ArdFilmDto.java │ │ │ ├── ArdFilmInfoDto.java │ │ │ ├── ArdFilmUrlInfoDto.java │ │ │ ├── ArdTopicInfoDto.java │ │ │ ├── ArdUrlOptimizer.java │ │ │ ├── ArdVideoDTO.java │ │ │ ├── PaginationUrlDto.java │ │ │ ├── json │ │ │ │ ├── ArdDayPageDeserializer.java │ │ │ │ ├── ArdErrorDeserializer.java │ │ │ │ ├── ArdErrorInfoDto.java │ │ │ │ ├── ArdFilmDeserializer.java │ │ │ │ ├── ArdMediaArrayToDownloadUrlsConverter.java │ │ │ │ ├── ArdTeasersDeserializer.java │ │ │ │ ├── ArdTopicPageDeserializer.java │ │ │ │ ├── ArdTopicsDeserializer.java │ │ │ │ ├── ArdTopicsLetterDeserializer.java │ │ │ │ ├── ArdVideoInfoDto.java │ │ │ │ └── ArdVideoInfoJsonDeserializer.java │ │ │ └── tasks │ │ │ │ ├── ArdDayPageTask.java │ │ │ │ ├── ArdFilmDetailTask.java │ │ │ │ ├── ArdTaskBase.java │ │ │ │ ├── ArdTopicPageTask.java │ │ │ │ ├── ArdTopicsLetterTask.java │ │ │ │ └── ArdTopicsTask.java │ │ │ ├── arte │ │ │ ├── ArteCategoryFilmListDeserializer.java │ │ │ ├── ArteCategoryFilmsDTO.java │ │ │ ├── ArteCollectionChildDeserializer.java │ │ │ ├── ArteCollectionParentDeserializer.java │ │ │ ├── ArteDatenFilmDeserializer.java │ │ │ ├── ArteHttpClient.java │ │ │ ├── ArteJsonObjectToDatenFilmCallable.java │ │ │ ├── ArteListBaseDeserializer.java │ │ │ ├── ArteProgramIdToDatenFilmCallable.java │ │ │ ├── ArteSubPageDeserializer.java │ │ │ ├── ArteVideoDTO.java │ │ │ ├── ArteVideoDeserializer.java │ │ │ ├── ArteVideoDetailsDTO.java │ │ │ ├── ArteVideoDetailsDeserializer.java │ │ │ ├── ArteVideoType.java │ │ │ ├── ArteVideoTypeMapper.java │ │ │ └── MediathekArte.java │ │ │ ├── base │ │ │ ├── AbstractDocumentTask.java │ │ │ ├── AbstractJsonRestTask.java │ │ │ ├── AbstractRecursivConverterTask.java │ │ │ ├── AbstractRestTask.java │ │ │ ├── AbstractUrlTask.java │ │ │ ├── CrawlerUrlDTO.java │ │ │ ├── DateUtils.java │ │ │ ├── FilmUrlInfoDto.java │ │ │ ├── GeoLocations.java │ │ │ ├── HtmlDocumentUtils.java │ │ │ ├── JsonUtils.java │ │ │ ├── JsoupConnection.java │ │ │ ├── M3U8Constants.java │ │ │ ├── M3U8Dto.java │ │ │ ├── M3U8Parser.java │ │ │ ├── PagedElementListDTO.java │ │ │ ├── Qualities.java │ │ │ ├── SendungOverviewDto.java │ │ │ ├── TopicUrlDTO.java │ │ │ ├── UrlParseException.java │ │ │ └── UrlUtils.java │ │ │ ├── dreisat │ │ │ ├── DreiSatCrawler.java │ │ │ ├── DreisatConstants.java │ │ │ ├── DreisatDayPageHtmlDeserializer.java │ │ │ └── DreisatDayPageHtmlTask.java │ │ │ ├── dw │ │ │ ├── DWTaskBase.java │ │ │ ├── DwConstants.java │ │ │ ├── DwCrawler.java │ │ │ ├── DwFilmDetailDto.java │ │ │ ├── DwVideoDto.java │ │ │ ├── parser │ │ │ │ ├── DWSendungOverviewDeserializer.java │ │ │ │ └── DwFilmDetailDeserializer.java │ │ │ └── tasks │ │ │ │ ├── DWOverviewTask.java │ │ │ │ └── DwFilmDetailTask.java │ │ │ ├── funk │ │ │ ├── ApiUrlBuilder.java │ │ │ ├── CrawlerUrlType.java │ │ │ ├── FilmInfoDto.java │ │ │ ├── FunkApiUrls.java │ │ │ ├── FunkChannelDTO.java │ │ │ ├── FunkCrawler.java │ │ │ ├── FunkUrls.java │ │ │ ├── json │ │ │ │ ├── AbstractFunkElementDeserializer.java │ │ │ │ ├── FunkChannelDeserializer.java │ │ │ │ ├── FunkVideoDeserializer.java │ │ │ │ ├── NexxCloudSessionInitDeserializer.java │ │ │ │ ├── NexxCloudVideoDetailsDeserializer.java │ │ │ │ └── NexxResolutionDTO.java │ │ │ └── tasks │ │ │ │ ├── FunkChannelsRestTask.java │ │ │ │ ├── FunkRestEndpoint.java │ │ │ │ ├── FunkRestTask.java │ │ │ │ ├── FunkVideosToFilmsTask.java │ │ │ │ └── NexxCloudSessionInitiationTask.java │ │ │ ├── kika │ │ │ ├── KikaApiConstants.java │ │ │ ├── KikaApiCrawler.java │ │ │ ├── KikaApiFilmDto.java │ │ │ ├── KikaApiTopicDto.java │ │ │ ├── KikaApiVideoInfoDto.java │ │ │ ├── Resolution.java │ │ │ ├── json │ │ │ │ ├── KikaApiTopicPageDeserializer.java │ │ │ │ └── KikaApiVideoInfoPageDeserializer.java │ │ │ └── tasks │ │ │ │ ├── KikaApiFilmTask.java │ │ │ │ └── KikaApiTopicTask.java │ │ │ ├── orfon │ │ │ ├── OrfHttpClient.java │ │ │ ├── OrfOnBreadCrumsUrlDTO.java │ │ │ ├── OrfOnConstants.java │ │ │ ├── OrfOnCrawler.java │ │ │ ├── OrfOnVideoInfoDTO.java │ │ │ ├── json │ │ │ │ ├── OrfOnAZDeserializer.java │ │ │ │ ├── OrfOnEpisodeDeserializer.java │ │ │ │ ├── OrfOnEpisodesDeserializer.java │ │ │ │ ├── OrfOnHistoryChildrenDeserializer.java │ │ │ │ ├── OrfOnHistoryDeserializer.java │ │ │ │ ├── OrfOnHistoryVideoItemDeserializer.java │ │ │ │ └── OrfOnScheduleDeserializer.java │ │ │ └── task │ │ │ │ ├── OrfOnAZTask.java │ │ │ │ ├── OrfOnEpisodeTask.java │ │ │ │ ├── OrfOnEpisodesTask.java │ │ │ │ ├── OrfOnHistoryChildrenTask.java │ │ │ │ ├── OrfOnHistoryTask.java │ │ │ │ ├── OrfOnHistoryVideoItemTask.java │ │ │ │ ├── OrfOnPagedTask.java │ │ │ │ └── OrfOnScheduleTask.java │ │ │ ├── phoenix │ │ │ ├── PhoenixConstants.java │ │ │ ├── PhoenixCrawler.java │ │ │ ├── parser │ │ │ │ ├── PhoenixFilmDetailDeserializer.java │ │ │ │ ├── PhoenixFilmDetailDto.java │ │ │ │ ├── PhoenixFilmXmlHandler.java │ │ │ │ └── PhoenixSendungOverviewDeserializer.java │ │ │ └── tasks │ │ │ │ ├── PhoenixFilmDetailTask.java │ │ │ │ └── PhoenixOverviewTask.java │ │ │ ├── sr │ │ │ ├── SrConstants.java │ │ │ ├── SrCrawler.java │ │ │ ├── SrTopicUrlDTO.java │ │ │ └── tasks │ │ │ │ ├── SrFilmDetailTask.java │ │ │ │ ├── SrRateLimitedDocumentTask.java │ │ │ │ ├── SrTopicArchivePageTask.java │ │ │ │ └── SrTopicsOverviewPageTask.java │ │ │ ├── srf │ │ │ ├── SrfConstants.java │ │ │ ├── SrfCrawler.java │ │ │ ├── parser │ │ │ │ ├── SrfFilmJsonDeserializer.java │ │ │ │ ├── SrfTopicDeserializer.java │ │ │ │ └── SrfTopicsDeserializer.java │ │ │ └── tasks │ │ │ │ ├── SrfFilmDetailTask.java │ │ │ │ ├── SrfTopicOverviewTask.java │ │ │ │ └── SrfTopicsOverviewTask.java │ │ │ └── zdf │ │ │ ├── AbstractZdfCrawler.java │ │ │ ├── DownloadDtoFilmConverter.java │ │ │ ├── ZdfConfiguration.java │ │ │ ├── ZdfConstants.java │ │ │ ├── ZdfCrawler.java │ │ │ ├── ZdfDatenFilm.java │ │ │ ├── ZdfFilmDto.java │ │ │ ├── ZdfVideoUrlOptimizer.java │ │ │ ├── json │ │ │ ├── DownloadDto.java │ │ │ ├── ZdfDayPageDeserializer.java │ │ │ ├── ZdfDayPageDto.java │ │ │ ├── ZdfDownloadDtoDeserializer.java │ │ │ └── ZdfFilmDetailDeserializer.java │ │ │ ├── parser │ │ │ ├── ZdfDayPageHtmlDeserializer.java │ │ │ ├── ZdfLetterListHtmlDeserializer.java │ │ │ ├── ZdfTopicPageHtmlDeserializer.java │ │ │ └── ZdfTopicsPageHtmlDeserializer.java │ │ │ └── tasks │ │ │ ├── ZdfDayPageHtmlTask.java │ │ │ ├── ZdfDayPageTask.java │ │ │ ├── ZdfFilmDetailTask.java │ │ │ ├── ZdfIndexPageTask.java │ │ │ ├── ZdfLetterListHtmlTask.java │ │ │ ├── ZdfTaskBase.java │ │ │ ├── ZdfTopicPageHtmlTask.java │ │ │ └── ZdfTopicsPageHtmlTask.java │ │ ├── daten │ │ ├── MserverDatenUpload.java │ │ ├── MserverListeSuchen.java │ │ ├── MserverListeUpload.java │ │ └── MserverSearchTask.java │ │ ├── search │ │ └── MserverSearch.java │ │ ├── tool │ │ ├── DateWithoutTimeComparer.java │ │ ├── EnvManager.java │ │ ├── HashFileWriter.java │ │ ├── M3U8Utils.java │ │ ├── MserverDaten.java │ │ ├── MserverDatumZeit.java │ │ ├── MserverKonstanten.java │ │ ├── MserverLog.java │ │ ├── MserverTimer.java │ │ ├── MserverWarten.java │ │ ├── MserverXmlLesen.java │ │ ├── MserverXmlSchreiben.java │ │ └── StatsUpload.java │ │ └── upload │ │ ├── MserverCopy.java │ │ ├── MserverFtp.java │ │ └── MserverUpload.java └── resources │ ├── MServer-LICENSE.txt │ ├── log4j2.xml │ └── mServer │ └── crawler │ └── gui │ ├── MSearchGui.fxml │ ├── PanelDel.fxml │ ├── PanelSearch.fxml │ ├── PanelTool.fxml │ ├── msearchgui.css │ ├── paneldel.css │ ├── panelsearch.css │ └── paneltool.css └── test ├── developTest ├── java │ └── mServer │ │ ├── crawler │ │ ├── AddToFilmlistTest.java │ │ └── sender │ │ │ └── arte │ │ │ ├── ArteCategoryFilmListDeserializerTest.java │ │ │ ├── ArteSubPageDeserializerTest.java │ │ │ └── ArteVideoDetailsDeserializerTest.java │ │ ├── test │ │ ├── JsonFileReader.java │ │ └── TestFileReader.java │ │ └── tool │ │ ├── HashFileWriterTest.java │ │ └── M3U8UtilsTest.java └── resources │ └── arte │ ├── arte_category.json │ ├── arte_static_content.json │ ├── arte_video_details_first_several_minors_geo_defr.json │ ├── arte_video_details_first_with_catchuprights_past_geo_sat.json │ ├── arte_video_details_first_without_catchuprights_geo_eudefr.json │ ├── arte_video_details_major_with_catchuprights_past.json │ ├── arte_video_details_no_broadcastprogrammings_geo_all.json │ ├── arte_video_details_no_broadcastprogrammings_nocatchuprights.json │ ├── arte_video_details_several_majors_minors_geo_null.json │ ├── arte_video_list1.json │ ├── arte_video_list2.json │ └── arte_video_list_last.json └── performanceTest ├── java └── PerformanceTest.java └── resources └── mserver.xml /.ci/notify_github.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Installiere curl 4 | apt-get update -qq && apt-get install -y -qq curl > /dev/null 5 | 6 | GITHUB_ORG="mediathekview" 7 | GITHUB_REPO="MServer" 8 | GITHUB_SHA="${CI_COMMIT_SHA}" 9 | cat << EOF > headers.curl 10 | Accept: application/vnd.github+json 11 | Authorization: token ${GITHUB_API_TOKEN} 12 | EOF 13 | cat << EOF > success.json 14 | { 15 | "state" : "success", 16 | "target_url" : "${CI_PIPELINE_URL}", 17 | "description" : "CI runs at ElaonDE systems successful" 18 | } 19 | EOF 20 | cat << EOF > failure.json 21 | { 22 | "state" : "failure", 23 | "target_url" : "${CI_PIPELINE_URL}", 24 | "description" : "CI runs at ElaonDE systems failed" 25 | } 26 | EOF 27 | cat << EOF > pending.json 28 | { 29 | "state" : "pending", 30 | "target_url" : "${CI_PIPELINE_URL}", 31 | "description" : "CI runs at ElaonDE systems pending" 32 | } 33 | EOF 34 | GITHUB_API_URL="https://api.github.com/repos/${GITHUB_ORG}/${GITHUB_REPO}/statuses/${GITHUB_SHA}" 35 | if [ "$1" == "success" ]; then 36 | curl -s -X POST -H @headers.curl "${GITHUB_API_URL}" -d @success.json 37 | elif [ "$1" == "failure" ]; then 38 | curl -s -X POST -H @headers.curl "${GITHUB_API_URL}" -d @failure.json 39 | elif [ "$1" == "pending" ]; then 40 | curl -s -X POST -H @headers.curl "${GITHUB_API_URL}" -d @pending.json 41 | fi -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # 2 | # https://help.github.com/articles/dealing-with-line-endings/ 3 | # 4 | # Linux start script should use lf 5 | /gradlew text eol=lf 6 | 7 | # These are Windows script files and should use crlf 8 | *.bat text eol=crlf 9 | 10 | -------------------------------------------------------------------------------- /.github/workflows/gradle.yml: -------------------------------------------------------------------------------- 1 | name: Build and test 2 | 3 | on: [ push,pull_request ] 4 | 5 | jobs: 6 | build: 7 | 8 | runs-on: ubuntu-latest 9 | 10 | steps: 11 | - uses: actions/checkout@v2 12 | - name: Set up JDK 17 13 | uses: actions/setup-java@v1 14 | with: 15 | java-version: 17 16 | java-package: jdk+fx 17 | - name: Build and test with gradle 18 | run: ./gradlew check developTest build 19 | - name: SonarCloud Scan 20 | run: ./gradlew sonarqube -Dsonar.projectKey=mediathekview_MServer -Dsonar.organization=mediathekview -Dsonar.host.url=https://sonarcloud.io -Dsonar.login=$SONAR_TOKEN 21 | env: 22 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 23 | SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }} 24 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | on: 3 | push: 4 | tags: 5 | - "*" 6 | 7 | jobs: 8 | release: 9 | runs-on: ubuntu-latest 10 | 11 | steps: 12 | - uses: actions/checkout@v2 13 | - name: Set up JDK 17 14 | uses: actions/setup-java@v1 15 | with: 16 | java-version: 17 17 | java-package: jdk+fx 18 | - name: Cache local Maven repository 19 | uses: actions/cache@v4 20 | with: 21 | path: ~/.m2/repository 22 | key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} 23 | restore-keys: | 24 | ${{ runner.os }}-maven- 25 | 26 | - name: Build and test 27 | run: ./gradlew check developTest build 28 | 29 | - name: SonarCloud Scan 30 | run: ./gradlew sonarqube -Dsonar.projectKey=mediathekview_MServer -Dsonar.organization=mediathekview -Dsonar.host.url=https://sonarcloud.io -Dsonar.login=$SONAR_TOKEN 31 | env: 32 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 33 | SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }} 34 | 35 | - name: Publish docker container 36 | run: | 37 | export VERSION=$(find . -name "MServer-*.tar.gz" | sed "s/.*MServer-\(.*\).tar.gz/\1/") 38 | docker build . -t mediathekview/mserver:$VERSION -t mediathekview/mserver:latest --build-arg VERSION=$VERSION 39 | echo "$DOCKER_PASSWORD" | docker login -u "$DOCKER_USERNAME" --password-stdin 40 | docker push mediathekview/mserver 41 | env: 42 | DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }} 43 | DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }} 44 | 45 | - name: Publish GitHub Release 46 | uses: marvinpinto/action-automatic-releases@latest 47 | with: 48 | repo_token: "${{ secrets.GITHUB_TOKEN }}" 49 | automatic_release_tag: "latest" 50 | prerelease: false 51 | files: | 52 | LICENSE.md 53 | build/libs/*.jar 54 | build/distributions/* -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | dist/javadoc 2 | 3 | nbproject 4 | .nb-gradle 5 | 6 | *.class 7 | *.zip 8 | *~ 9 | .gitignore 10 | gitUpdate.sh 11 | 12 | start*.sh 13 | /.gradle/ 14 | /bin/ 15 | .classpath 16 | .project 17 | .settings/ 18 | 19 | .idea 20 | build/ 21 | 22 | /src/main/resources/version.properties 23 | RepoZugang.properties 24 | 25 | logs/ 26 | /target/ 27 | /.checkstyle 28 | 29 | ui/ 30 | 31 | # Ignore Gradle project-specific cache directory 32 | .gradle 33 | 34 | # Ignore Gradle build output directory 35 | build 36 | 37 | devenv/ -------------------------------------------------------------------------------- /.gitlab-ci.yml: -------------------------------------------------------------------------------- 1 | variables: 2 | GIT_SUBMODULE_STRATEGY: recursive 3 | TZ: Europe/Berlin 4 | 5 | #image: maven:3.9-eclipse-temurin-8 6 | image: bellsoft/liberica-openjdk-debian:17 7 | 8 | cache: 9 | paths: 10 | - ./.m2 11 | 12 | 13 | #default: 14 | # before_script: 15 | 16 | 17 | stages: 18 | - .prenotify 19 | # - test 20 | - release 21 | - .postnotify 22 | 23 | notify-github-pending: 24 | stage: .prenotify 25 | when: always 26 | script: 27 | - .ci/notify_github.sh pending 28 | 29 | #test: 30 | # stage: test 31 | # script: 32 | # - mvn -B package 33 | # rules: 34 | # - if: '$CI_COMMIT_BRANCH == "develop"' 35 | 36 | Build and Deploy: 37 | stage: release 38 | script: 39 | # - apt-get update -qq 40 | # - apt-get install -y -qq ssh lib32ncurses6 lib32z1 wget tar file gnupg2 git-lfs > /dev/null 41 | # - source /private/MServerENVS 42 | - whoami 43 | # - mkdir ~/.ssh 44 | # - ssh-keyscan -p 60002 dw2.mvorg.de >> ~/.ssh/known_hosts 45 | # - ssh-keyscan -p 60002 148.251.176.136 >> ~/.ssh/known_hosts 46 | # - chmod 644 ~/.ssh/known_hosts 47 | - java -version 48 | # - mvn -v 49 | # - mvn clean 50 | # - mvn -B package 51 | - ./gradlew build 52 | - ls -l build/distributions/ 53 | - cd build/distributions 54 | - "tar -xzf MServer-${CI_COMMIT_TAG}.tar.gz" 55 | - "cd MServer-${CI_COMMIT_TAG}" 56 | - rm -R Copyright/ upload.xml mserver.xml STARTEN__MServer.sh STOPPEN__MServer.sh MServer.sh 57 | - ls -l 58 | - mkdir ../MServer 59 | - mv * ../MServer 60 | - cd ../MServer 61 | - ls -l 62 | - "test -d /var/www/res.mediathekview.de/web/MServer || mkdir /var/www/res.mediathekview.de/web/MServer" 63 | - ls -l /var/www/res.mediathekview.de/web/MServer/ 64 | - tar -czf /var/www/res.mediathekview.de/web/MServer/MServer.tar.gz * 65 | - 'echo "Version vorher: " && (cat /var/www/res.mediathekview.de/web/MServer/MServer.version || echo "Keine Versionsdatei vorhanden")' 66 | - "echo \"${CI_COMMIT_TAG}\" > /var/www/res.mediathekview.de/web/MServer/MServer.version" 67 | - 'echo "Version jetzt: " && cat /var/www/res.mediathekview.de/web/MServer/MServer.version' 68 | tags: 69 | - mvweb1 70 | rules: 71 | - if: $CI_COMMIT_TAG == "latest" 72 | when: never 73 | - if: $CI_COMMIT_TAG 74 | 75 | 76 | notify-github-success: 77 | stage: .postnotify 78 | when: on_success 79 | script: 80 | - .ci/notify_github.sh success 81 | 82 | notify-github-failure: 83 | stage: .postnotify 84 | when: on_failure 85 | script: 86 | - .ci/notify_github.sh failure -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM "openjdk:8-jre-slim" 2 | MAINTAINER "MediathekView " 3 | 4 | # On Build 5 | ARG VERSION 6 | ARG BASE_DIR="/opt/MServer" 7 | 8 | # On Run 9 | ENV MIN_RAM "256M" 10 | ENV MAX_RAM "2G" 11 | 12 | RUN mkdir -p $BASE_DIR 13 | WORKDIR $BASE_DIR 14 | 15 | ADD ./build/distributions/MServer-$VERSION.tar.gz . 16 | RUN mkdir config && \ 17 | mv MServer-$VERSION/* . && \ 18 | mv mserver.xml config/ && \ 19 | mv bannedFilmList.txt config/ && \ 20 | mv upload.xml config/ && \ 21 | mv live-streams.json config/ 22 | 23 | VOLUME /opt/MServer/config 24 | CMD java -Xms$MIN_RAM -Xmx$MAX_RAM -jar ./MServer.jar config -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![License: GPL v3](https://img.shields.io/badge/License-GPL%20v3-blue.svg)](http://www.gnu.org/licenses/gpl-3.0) 2 | [![Build status](https://github.com/mediathekview/MServer/workflows/Build%20and%20test/badge.svg?branch=master)](https://github.com/mediathekview/MServer/actions?query=workflow%3A%22Build+and+test%22+branch%3Amaster) 3 | [![Quality Gate Status](https://sonarcloud.io/api/project_badges/measure?project=mediathekview_MServer&metric=alert_status)](https://sonarcloud.io/dashboard?id=mediathekview_MServer) 4 | 5 | # MServer 6 | Server zum crawlen der Mediatheken. Teil von [MediathekView](https://github.com/mediathekview). 7 | 8 | # Entwicklung 9 | 10 | ## Code auschecken 11 | ```bash 12 | mkdir mediathekview 13 | cd mediathekview 14 | git clone https://github.com/mediathekview/MServer.git 15 | git clone https://github.com/mediathekview/Mlib.git 16 | ``` 17 | 18 | ## Bauen und starten an der Kommandozeile 19 | ```bash 20 | cd MServer 21 | ./gradlew start 22 | ``` 23 | 24 | ## Einstellungen 25 | 26 | Kompression zu xz Datei deaktivieren: 27 | ```bash 28 | export NOCOMPRESS=y 29 | java -jar MServer.jar 30 | ``` 31 | 32 | ## Entwicklung mit Netbeans 33 | * Verzeichnisse `MLib` und `MServer` mit Netbeans öffnen 34 | 35 | ## Entwicklung mit Eclipse 36 | * Falls noch nicht vorhanden: [Plugin buildship](https://projects.eclipse.org/projects/tools.buildship) installieren 37 | * Projekt `MServer` als Gradle-Projekt importieren. `MLib` wird automatisch mit importiert. 38 | 39 | 40 | # Überblick der Crawler 41 | 42 | | Crawler | liest Mediathek | beinhaltet Sender | bestückt Sender | entspricht Develop | 43 | |---------|-----------|--------|---------|--| 44 | | 3sat|3sat-Mediathek|3sat |3sat|x| 45 | | ARD|ARD-Mediathek|Alpha, BR, Das Erste, HR, MDR, NDR, ONE, Radio Bremen, RBB, SR, SWR, WDR, tagesschau24|ARD, BR, HR, MDR, NDR, Radio Bremen, RBB, SWR, WDR| x| 46 | | ARTE|ARTE-Mediathek|ARTE in DE, FR, EN, ES, PL, IT|ARTE.DE, ARTE.FR|| 47 | | DW|DW-Mediathek|DW|DW|x| 48 | | FUNK | FUNK-Webseite | FUNK |FUNK |x| 49 | | KIKA|KIKA-Mediathek|KIKA|KIKA|x| 50 | | ORF|ORF-Mediathek|ORF1, ORF2, ORF3, ORFSport|ORF|x| 51 | | PHOENIX|PHOENIX-Mediathek|PHOENIX|PHOENIX|x| 52 | | SR|SR-Mediathek|SR|SR|x| 53 | | SRF|SRF-Mediathek|SRF1, SRF2, SRFinfo|SRF|x| 54 | | ZDF|ZDF-Mediathek|ZDF, ZDFneo, ZDFinfo|ZDF|x| 55 | -------------------------------------------------------------------------------- /dist/MServer.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | dir=`dirname "$0"` 4 | cd "$dir" 5 | 6 | # falls das Programm vom letzten Start noch läuft 7 | # wird es beendet 8 | while [ "$(ps aux | grep "[M]Server.jar")" ] 9 | do 10 | echo 11 | echo =========================================== 12 | echo =========================================== 13 | echo 14 | echo da lauft noch was!!!!!! 15 | echo 16 | echo =========================================== 17 | echo =========================================== 18 | echo 19 | 20 | # Sollte noch ein gleichnamiger Prozess laufen wird er hiermit beendet 21 | kill $(pgrep -f MServer.jar) > /dev/null 2>&1 22 | ps aux | grep "[M]Server.jar" | kill -9 $(cut -c 10-14) > /dev/null 2>&1 23 | done 24 | 25 | 26 | # jetzt gehts mit dem Start weiter 27 | echo %% 28 | echo %% ------------------------------------- 29 | echo %% Pfad: $dir 30 | echo %% ------------------------------------- 31 | echo %% 32 | 33 | 34 | if [ -n "$JAVA_HOME" ]; then 35 | $JAVA_HOME/bin/java -XX:+UseG1GC -XX:+UseStringDeduplication -Xmx3G -jar ./MServer.jar $dir $* 36 | else 37 | java -XX:+UseG1GC -XX:+UseStringDeduplication -Xmx3G -jar ./MServer.jar $dir $* 38 | fi 39 | 40 | cd $OLDPWD 41 | 42 | echo %% und Tschuess 43 | echo %% ----------------------------------------- 44 | exit 0 45 | 46 | 47 | -------------------------------------------------------------------------------- /dist/STARTEN__MServer.sh: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | # 3 | 4 | dir=`dirname "$0"` 5 | cd "$dir" 6 | 7 | ./MServer.sh >> ./direktStart__`date "+%Y.%m.%d"`.log & 8 | 9 | cd $OLDPWD 10 | echo %% und Tschuess 11 | echo %% ----------------------------------------- 12 | exit 0 13 | 14 | -------------------------------------------------------------------------------- /dist/STOPPEN__MServer.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # falls das Programm vom letzten Start noch läuft 4 | # wird es beendet 5 | while [ "$(ps aux | grep "[M]Server.jar")" ] 6 | do 7 | echo 8 | echo =========================================== 9 | echo =========================================== 10 | echo 11 | echo da lauft noch was!!!!!! 12 | echo 13 | echo =========================================== 14 | echo =========================================== 15 | echo 16 | 17 | # Sollte noch ein gleichnamiger Prozess laufen wird er hiermit beendet 18 | kill $(pgrep -f MServer.jar) > /dev/null 2>&1 19 | ps aux | grep "[M]Server.jar" | kill -9 $(cut -c 10-14) > /dev/null 2>&1 20 | done 21 | 22 | 23 | echo %% und Tschuess 24 | echo %% ----------------------------------------- 25 | exit 0 26 | 27 | 28 | -------------------------------------------------------------------------------- /dist/bannedFilmList.txt: -------------------------------------------------------------------------------- 1 | Geschichte einer Liebe - Freya 2 | Wir haben genug - Wirtschaft ohne Wachstum 3 | Auslegung der Wirklichkeit - Georg Stefan Troller - Dokumentarfilm von Ruth Rieser, Österreich 2021 4 | 5 | -------------------------------------------------------------------------------- /dist/mserver.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | MServer 5 | 6 | 7 | http://verteiler1.mediathekview.de/Filmliste-diff.xz 8 | 9 | 10 | 11 | http://verteiler1.mediathekview.de/Filmliste-akt.xz 12 | 13 | 14 | live-streams.json 15 | 16 | 20 | 21 | 22 | 23 | filme-org.xz 24 | 25 | 26 | file:bannedFilmList.txt 27 | 28 | ARD,ZDF,ARTE,DW,KIKA,FUNK,3SAT,SR,SRF,SRFPOD,ORF,PHONIX 29 | 30 | 31 | 32 | 1 33 | 1.0 34 | 35 | 36 | 37 | 38 | 39 | 40 | lang 41 | 42 | 43 | neu 44 | 45 | 46 | sofort 47 | 48 | 49 | 1 50 | 51 | 52 | true 53 | 54 | 55 | 56 | 57 | 64 | 65 | 66 | -------------------------------------------------------------------------------- /dist/upload.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mediathekview/MServer/2e4b291c8ae2da221369245368bdeb6f6849d827/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | distributionBase=GRADLE_USER_HOME 2 | distributionPath=wrapper/dists 3 | distributionUrl=https\://services.gradle.org/distributions/gradle-7.5.1-bin.zip 4 | zipStoreBase=GRADLE_USER_HOME 5 | zipStorePath=wrapper/dists 6 | -------------------------------------------------------------------------------- /jdk8.sh: -------------------------------------------------------------------------------- 1 | # einlesen mit: 2 | # source jdk11.sh 3 | 4 | #PFAD="/usr/lib/jvm/java-8-openjdk-amd64" 5 | 6 | #export J2SDKDIR=$PFAD 7 | #export J2REDIR=$PFAD 8 | #export PATH=$PATH:${PFAD}/bin:${PFAD}/db/bin 9 | #export JAVA_HOME=${PFAD} 10 | #export DERBY_HOME=${PFAD}/db 11 | 12 | 13 | # with sdkman 14 | 15 | sdk use java 8.0.382.fx-zulu 16 | 17 | # list sdk's for java 18 | sdk list java 19 | -------------------------------------------------------------------------------- /settings.gradle: -------------------------------------------------------------------------------- 1 | rootProject.name = 'MServer' 2 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/BannedFilmFilter.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler; 2 | 3 | import java.io.BufferedReader; 4 | import java.io.IOException; 5 | import java.io.InputStream; 6 | import java.io.InputStreamReader; 7 | import java.net.HttpURLConnection; 8 | import java.net.URL; 9 | import java.util.ArrayList; 10 | import java.util.List; 11 | import java.util.zip.GZIPInputStream; 12 | 13 | import de.mediathekview.mlib.daten.DatenFilm; 14 | import de.mediathekview.mlib.tool.Log; 15 | import mServer.tool.MserverDaten; 16 | import mServer.tool.MserverKonstanten; 17 | 18 | public class BannedFilmFilter { 19 | private final List bannedTitles; 20 | 21 | public BannedFilmFilter() { 22 | Log.progress("create BannedFilmFilter from " + MserverDaten.system[MserverKonstanten.SYSTEM_BANNEDFILMLIST_NR] ); 23 | bannedTitles = new ArrayList<>(); 24 | try ( 25 | InputStream is = getInputStreamFromPath(MserverDaten.system[MserverKonstanten.SYSTEM_BANNEDFILMLIST_NR]); 26 | InputStreamReader isr = new InputStreamReader(is); 27 | BufferedReader reader = new BufferedReader(isr); 28 | ) { 29 | String line = ""; 30 | while ((line = reader.readLine()) != null) { 31 | if (!line.trim().isEmpty()) { 32 | bannedTitles.add(line.trim()); 33 | Log.progress("add entry to bannedFilmList"); 34 | } 35 | } 36 | } catch (IOException e) { 37 | Log.errorLog(-1, e); 38 | } 39 | } 40 | 41 | public boolean isBanned(final DatenFilm film) { 42 | for (String title : bannedTitles) { 43 | if (film.arr[DatenFilm.FILM_TITEL].toUpperCase().contains(title.toUpperCase())) { 44 | return true; 45 | } 46 | } 47 | return false; 48 | } 49 | 50 | public static InputStream getInputStreamFromPath(String path) throws IOException { 51 | InputStream is; 52 | String protocol = path.replaceFirst("^(\\w+):.+$", "$1").toLowerCase(); 53 | switch (protocol) { 54 | case "http": 55 | case "https": 56 | HttpURLConnection connection = (HttpURLConnection) new URL(path).openConnection(); 57 | int code = connection.getResponseCode(); 58 | if (code >= 400) throw new IOException("Server returned error code #" + code); 59 | is = connection.getInputStream(); 60 | String contentEncoding = connection.getContentEncoding(); 61 | if (contentEncoding != null && contentEncoding.equalsIgnoreCase("gzip")) 62 | is = new GZIPInputStream(is); 63 | break; 64 | case "file": 65 | is = new URL(path).openStream(); 66 | break; 67 | case "classpath": 68 | is = Thread.currentThread().getContextClassLoader().getResourceAsStream(path.replaceFirst("^\\w+:", "")); 69 | break; 70 | default: 71 | throw new IOException("Missed or unsupported protocol in path '" + path + "'"); 72 | } 73 | return is; 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/CrawlerConfig.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | package mServer.crawler; 7 | 8 | /** 9 | * 10 | * @author emil 11 | */ 12 | public class CrawlerConfig { 13 | 14 | //alle Programmeinstellungen 15 | public static String proxyUrl = ""; 16 | public static int proxyPort = -1; 17 | public static String importLive = ""; // live-streams 18 | public static String importUrl_2__anhaengen = ""; 19 | public static String importOld = ""; // alte Liste importieren 20 | public static String importAkt = ""; // akteuelle Liste eines anderen Crawler importieren 21 | public static String importUrl_1__anhaengen = ""; 22 | public static final int LOAD_SHORT = 0; 23 | public static final int LOAD_LONG = 1; 24 | public static int senderLoadHow = LOAD_SHORT; 25 | public static final int LOAD_MAX = 2; 26 | public static boolean updateFilmliste = false; // die bestehende Filmliste wird aktualisiert und bleibt erhalten 27 | public static boolean orgFilmlisteErstellen = false; // dann wird eine neue Org-Liste angelegt, typ. die erste Liste am Tag 28 | public static String orgFilmliste = ""; // OrgFilmliste, zum Erstellen des Diff, angelegt wird sie immer im Ordner der Filmlisten, wenn leer wird die eigene Org-Liste gesucht 29 | public static String[] nurSenderLaden = null; // es wird nur dieser Sender geladen => "senderAllesLaden"=false, "updateFillmliste"=true 30 | // Verzeichnis zum Speichern der Programmeinstellungen 31 | public static String dirFilme = ""; // Pfad mit den Filmlisten 32 | 33 | } 34 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/gui/Data.java: -------------------------------------------------------------------------------- 1 | /* 2 | * MediathekView 3 | * Copyright (C) 2016 W. Xaver 4 | * W.Xaver[at]googlemail.com 5 | * http://zdfmediathk.sourceforge.net/ 6 | * 7 | * This program is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * any later version. 11 | * 12 | * This program is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with this program. If not, see . 19 | */ 20 | package mServer.crawler.gui; 21 | 22 | import de.mediathekview.mlib.daten.ListeFilme; 23 | 24 | public class Data { 25 | 26 | public static ListeFilme listeFilme = new ListeFilme(); 27 | public static String pathFilmlist = ""; 28 | public static MSearchGuiController mlibGuiController = null; 29 | 30 | } 31 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/gui/MSG.java: -------------------------------------------------------------------------------- 1 | /* 2 | * MediathekView 3 | * Copyright (C) 2016 W. Xaver 4 | * W.Xaver[at]googlemail.com 5 | * http://zdfmediathk.sourceforge.net/ 6 | * 7 | * This program is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * any later version. 11 | * 12 | * This program is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with this program. If not, see . 19 | */ 20 | package mServer.crawler.gui; 21 | 22 | import java.util.List; 23 | 24 | import javafx.application.Application; 25 | import javafx.fxml.FXMLLoader; 26 | import javafx.scene.Parent; 27 | import javafx.scene.Scene; 28 | import javafx.stage.Stage; 29 | 30 | public class MSG extends Application { 31 | 32 | @Override 33 | public void start(Stage stage) throws Exception { 34 | Parent root = FXMLLoader.load(getClass().getResource("/mServer/crawler/gui/MSearchGui.fxml")); 35 | Scene scene = new Scene(root); 36 | stage.setScene(scene); 37 | stage.setTitle("MSearchGui"); 38 | stage.show(); 39 | } 40 | 41 | @Override 42 | public void init() { 43 | List lp = getParameters().getRaw(); 44 | if (!lp.isEmpty() && !lp.get(0).startsWith("-")) { 45 | Data.pathFilmlist = lp.get(0); 46 | } 47 | } 48 | 49 | /** 50 | * @param args the command line arguments 51 | */ 52 | public static void main(String[] args) { 53 | launch(args); 54 | } 55 | 56 | } 57 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/ConfigReader.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender; 2 | 3 | /** 4 | * Loads the config Details. 5 | */ 6 | public class ConfigReader 7 | { 8 | 9 | //TODO 10 | } 11 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/CrawlerConfig.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender; 2 | 3 | 4 | public class CrawlerConfig 5 | { 6 | 7 | //TODO POJO which hols all needed codfigs. 8 | } 9 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/Film.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender; 2 | 3 | import java.net.URI; 4 | import java.util.Map; 5 | import java.util.concurrent.ConcurrentHashMap; 6 | 7 | import mServer.crawler.sender.base.Qualities; 8 | 9 | /** 10 | * Represents a found film. 11 | */ 12 | public class Film 13 | { 14 | private Map urls; 15 | 16 | public Film() 17 | { 18 | urls = new ConcurrentHashMap<>(); 19 | } 20 | 21 | public void addUrl(Qualities aQuality, URI aUrl) 22 | { 23 | if(aQuality != null && aUrl != null) 24 | { 25 | urls.put(aQuality,aUrl); 26 | } 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/ard/ArdConstants.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.ard; 2 | 3 | public class ArdConstants { 4 | 5 | public static final String API_URL = "https://api.ardmediathek.de"; 6 | public static final String BASE_URL = "https://api.ardmediathek.de/public-gateway"; 7 | 8 | public static final String ITEM_URL = API_URL + "/page-gateway/pages/ard/item/%s?embedded=true&mcV6=true"; 9 | 10 | public static final String TOPICS_URL = API_URL + "/page-gateway/pages/%s/editorial/experiment-a-z?embedded=false"; 11 | public static final String TOPICS_COMPILATION_URL = API_URL + "/page-gateway/widgets/%s/editorials/%s?pageNumber=0&pageSize=%s"; 12 | public static final String TOPIC_URL = API_URL + "/page-gateway/widgets/ard/asset/%s?pageSize=%d"; 13 | public static final String DAY_PAGE_URL = "https://programm-api.ard.de/program/api/program?day=%s&channelIds=%s&mode=channel"; 14 | 15 | public static final int TOPICS_COMPILATION_PAGE_SIZE = 200; 16 | public static final int TOPIC_PAGE_SIZE = 50; 17 | 18 | public static final String DEFAULT_CLIENT = "ard"; 19 | 20 | public static final String[] CLIENTS = 21 | new String[] { 22 | "daserste", 23 | "br", 24 | "hr", 25 | "mdr", 26 | "ndr", 27 | "radiobremen", 28 | "rbb", 29 | "sr", 30 | "swr", 31 | "wdr", 32 | "one", 33 | "alpha", 34 | "tagesschau24", 35 | "funk", 36 | "phoenix" 37 | }; 38 | 39 | public static final String[] IGNORED_SENDER = new String[] {"zdf", "kika", "3sat", "arte"}; 40 | 41 | public static final String WEBSITE_URL = "https://www.ardmediathek.de/video/%s"; 42 | 43 | public static final String BASE_URL_SUBTITLES = "https://classic.ardmediathek.de"; 44 | 45 | private ArdConstants() {} 46 | } 47 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/ard/ArdFilmDto.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.ard; 2 | 3 | import de.mediathekview.mlib.daten.DatenFilm; 4 | import java.util.HashSet; 5 | import java.util.Set; 6 | 7 | public class ArdFilmDto { 8 | 9 | private final DatenFilm film; 10 | private final Set relatedFilms; 11 | 12 | public ArdFilmDto(final DatenFilm film) { 13 | this.film = film; 14 | this.relatedFilms = new HashSet<>(); 15 | } 16 | 17 | public DatenFilm getFilm() { 18 | return film; 19 | } 20 | 21 | public Set getRelatedFilms() { 22 | return relatedFilms; 23 | } 24 | 25 | public void addRelatedFilm(final ArdFilmInfoDto filmInfoDto) { 26 | relatedFilms.add(filmInfoDto); 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/ard/ArdFilmInfoDto.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.ard; 2 | 3 | import java.util.Objects; 4 | import mServer.crawler.sender.base.CrawlerUrlDTO; 5 | 6 | public class ArdFilmInfoDto extends CrawlerUrlDTO { 7 | 8 | private final String id; 9 | private final int numberOfClips; 10 | 11 | public ArdFilmInfoDto(String id, String aUrl, int numberOfClips) { 12 | super(aUrl); 13 | 14 | this.id = id; 15 | this.numberOfClips = numberOfClips; 16 | } 17 | 18 | public String getId() { 19 | return id; 20 | } 21 | 22 | public int getNumberOfClips() { 23 | return numberOfClips; 24 | } 25 | 26 | @Override 27 | public boolean equals(Object o) { 28 | if (this == o) { 29 | return true; 30 | } 31 | if (!(o instanceof ArdFilmInfoDto)) { 32 | return false; 33 | } 34 | if (!super.equals(o)) { 35 | return false; 36 | } 37 | ArdFilmInfoDto that = (ArdFilmInfoDto) o; 38 | return numberOfClips == that.numberOfClips 39 | && Objects.equals(id, that.id); 40 | } 41 | 42 | @Override 43 | public int hashCode() { 44 | return Objects.hash(super.hashCode(), id, numberOfClips); 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/ard/ArdFilmUrlInfoDto.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.ard; 2 | 3 | import mServer.crawler.sender.base.FilmUrlInfoDto; 4 | 5 | public class ArdFilmUrlInfoDto extends FilmUrlInfoDto { 6 | 7 | private final String quality; 8 | 9 | public ArdFilmUrlInfoDto(final String aUrl, final String aQuality) { 10 | super(aUrl); 11 | quality = aQuality; 12 | } 13 | 14 | public String getQuality() { 15 | return quality; 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/ard/ArdTopicInfoDto.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.ard; 2 | 3 | import java.util.Objects; 4 | import java.util.Set; 5 | 6 | public class ArdTopicInfoDto { 7 | private final Set filmInfos; 8 | private String id; 9 | private int subPageNumber; 10 | private int maxSubPageNumber; 11 | 12 | public ArdTopicInfoDto(final Set filmInfos) { 13 | this.filmInfos = filmInfos; 14 | subPageNumber = 0; 15 | maxSubPageNumber = 0; 16 | } 17 | 18 | public Set getFilmInfos() { 19 | return filmInfos; 20 | } 21 | 22 | public String getId() { 23 | return id; 24 | } 25 | 26 | public void setId(String id) { 27 | this.id = id; 28 | } 29 | 30 | public int getSubPageNumber() { 31 | return subPageNumber; 32 | } 33 | 34 | public void setSubPageNumber(final int subPageNumber) { 35 | this.subPageNumber = subPageNumber; 36 | } 37 | 38 | public int getMaxSubPageNumber() { 39 | return maxSubPageNumber; 40 | } 41 | 42 | public void setMaxSubPageNumber(final int maxSubPageNumber) { 43 | this.maxSubPageNumber = maxSubPageNumber; 44 | } 45 | 46 | @Override 47 | public boolean equals(Object o) { 48 | if (this == o) return true; 49 | if (o == null || getClass() != o.getClass()) return false; 50 | ArdTopicInfoDto that = (ArdTopicInfoDto) o; 51 | return Objects.equals(id, that.id) && subPageNumber == that.subPageNumber && maxSubPageNumber == that.maxSubPageNumber && Objects.equals(filmInfos, that.filmInfos); 52 | } 53 | 54 | @Override 55 | public int hashCode() { 56 | return Objects.hash(id, filmInfos, subPageNumber, maxSubPageNumber); 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/ard/ArdVideoDTO.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.ard; 2 | 3 | import java.util.HashMap; 4 | import java.util.Map; 5 | import mServer.crawler.sender.base.Qualities; 6 | 7 | public class ArdVideoDTO { 8 | private final Map videoUrls; 9 | 10 | public ArdVideoDTO() { 11 | videoUrls = new HashMap<>(); 12 | } 13 | 14 | public void addVideo(Qualities aQualitie, String aUrl) 15 | { 16 | videoUrls.put(aQualitie,aUrl); 17 | } 18 | 19 | public Map getVideoUrls() 20 | { 21 | return videoUrls; 22 | } 23 | 24 | public String getUrl(Qualities aQualitie) 25 | { 26 | return videoUrls.get(aQualitie); 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/ard/PaginationUrlDto.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.ard; 2 | 3 | import mServer.crawler.sender.base.CrawlerUrlDTO; 4 | 5 | import java.util.HashSet; 6 | import java.util.Set; 7 | 8 | public class PaginationUrlDto { 9 | private final Set urls = new HashSet<>(); 10 | private int actualPage; 11 | private int maxPages; 12 | 13 | public void addUrl(CrawlerUrlDTO url) { 14 | urls.add(url); 15 | } 16 | 17 | public void addAll(Set urls) { 18 | this.urls.addAll(urls); 19 | } 20 | 21 | public Set getUrls() { 22 | return urls; 23 | } 24 | 25 | public int getActualPage() { 26 | return actualPage; 27 | } 28 | 29 | public int getMaxPages() { 30 | return maxPages; 31 | } 32 | 33 | public void setActualPage(int actualPage) { 34 | this.actualPage = actualPage; 35 | } 36 | 37 | public void setMaxPages(int maxPages) { 38 | this.maxPages = maxPages; 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/ard/json/ArdDayPageDeserializer.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.ard.json; 2 | 3 | import com.google.gson.*; 4 | 5 | import mServer.crawler.sender.ard.ArdConstants; 6 | import mServer.crawler.sender.ard.ArdFilmInfoDto; 7 | import mServer.crawler.sender.base.JsonUtils; 8 | 9 | import java.lang.reflect.Type; 10 | import java.util.HashSet; 11 | import java.util.Optional; 12 | import java.util.Set; 13 | 14 | public class ArdDayPageDeserializer implements JsonDeserializer> { 15 | 16 | private static final String ELEMENT_CHANNELS = "channels"; 17 | private static final String ELEMENT_LINKS = "links"; 18 | private static final String ELEMENT_TARGET = "target"; 19 | private static final String ELEMENT_TIMESLOTS = "timeSlots"; 20 | private static final String ATTRIBUTE_URL_ID = "urlId"; 21 | 22 | @Override 23 | public Set deserialize( 24 | final JsonElement jsonElement, final Type type, final JsonDeserializationContext context) { 25 | final Set results = new HashSet<>(); 26 | 27 | final JsonObject jsonObject = jsonElement.getAsJsonObject(); 28 | if (jsonObject.has(ELEMENT_CHANNELS)) { 29 | final JsonArray channels = jsonObject.get(ELEMENT_CHANNELS).getAsJsonArray(); 30 | results.addAll(parseChannels(channels)); 31 | } 32 | 33 | return results; 34 | } 35 | 36 | private Set parseChannels(JsonArray channels) { 37 | Set entries = new HashSet<>(); 38 | for (JsonElement channel : channels) { 39 | final JsonArray timeSlots = channel.getAsJsonObject().get(ELEMENT_TIMESLOTS).getAsJsonArray(); 40 | for (JsonElement timeSlot : timeSlots) { 41 | for (JsonElement entry : timeSlot.getAsJsonArray()) { 42 | final JsonObject entryObject = entry.getAsJsonObject(); 43 | final Optional id = toId(entryObject); 44 | id.ifPresent(s -> entries.add(createFilmInfo(s, 1))); 45 | } 46 | } 47 | } 48 | return entries; 49 | } 50 | 51 | private ArdFilmInfoDto createFilmInfo(final String id, final int numberOfClips) { 52 | final String url = String.format(ArdConstants.ITEM_URL, id); 53 | return new ArdFilmInfoDto(id, url, numberOfClips); 54 | } 55 | 56 | private Optional toId(final JsonObject teaserObject) { 57 | if (JsonUtils.checkTreePath(teaserObject, ELEMENT_LINKS, ELEMENT_TARGET)) { 58 | final JsonObject targetObject = 59 | teaserObject.get(ELEMENT_LINKS).getAsJsonObject().get(ELEMENT_TARGET).getAsJsonObject(); 60 | return JsonUtils.getAttributeAsString(targetObject, ATTRIBUTE_URL_ID); 61 | } 62 | return JsonUtils.getAttributeAsString(teaserObject, ATTRIBUTE_URL_ID); 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/ard/json/ArdErrorDeserializer.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.ard.json; 2 | 3 | import com.google.gson.*; 4 | 5 | import mServer.crawler.sender.base.JsonUtils; 6 | 7 | import java.lang.reflect.Type; 8 | import java.util.Optional; 9 | 10 | public class ArdErrorDeserializer implements JsonDeserializer> { 11 | 12 | private static final String ATTRIBUTE_CODE = "code"; 13 | private static final String ATTRIBUTE_MESSAGE = "message"; 14 | private static final String ELEMENT_ERRORS = "errors"; 15 | private static final String ELEMENT_EXTENSIONS = "extensions"; 16 | 17 | @Override 18 | public Optional deserialize( 19 | final JsonElement jsonElement, final Type type, final JsonDeserializationContext context) { 20 | 21 | if (!JsonUtils.hasElements(jsonElement, ELEMENT_ERRORS)) { 22 | return Optional.empty(); 23 | } 24 | 25 | final JsonArray errors = jsonElement.getAsJsonObject().get(ELEMENT_ERRORS).getAsJsonArray(); 26 | if (errors.size() > 0) { 27 | return parseError(errors.get(0).getAsJsonObject()); 28 | } 29 | 30 | return Optional.empty(); 31 | } 32 | 33 | private Optional parseError(final JsonObject error) { 34 | final Optional message = JsonUtils.getAttributeAsString(error, ATTRIBUTE_MESSAGE); 35 | Optional code = Optional.empty(); 36 | 37 | if (JsonUtils.hasElements(error, ELEMENT_EXTENSIONS)) { 38 | code = 39 | JsonUtils.getAttributeAsString( 40 | error.get(ELEMENT_EXTENSIONS).getAsJsonObject(), ATTRIBUTE_CODE); 41 | } 42 | 43 | final ArdErrorInfoDto result = new ArdErrorInfoDto(code.orElse(""), message.orElse("")); 44 | return Optional.of(result); 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/ard/json/ArdErrorInfoDto.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.ard.json; 2 | 3 | import java.util.Objects; 4 | 5 | public class ArdErrorInfoDto { 6 | 7 | private final String code; 8 | private final String message; 9 | 10 | public ArdErrorInfoDto(final String code, final String message) { 11 | this.code = code; 12 | this.message = message; 13 | } 14 | 15 | public String getMessage() { 16 | return message; 17 | } 18 | 19 | public String getCode() { 20 | return code; 21 | } 22 | 23 | @Override 24 | public boolean equals(final Object o) { 25 | if (this == o) { 26 | return true; 27 | } 28 | if (!(o instanceof ArdErrorInfoDto that)) { 29 | return false; 30 | } 31 | return Objects.equals(code, that.code) && 32 | Objects.equals(message, that.message); 33 | } 34 | 35 | @Override 36 | public int hashCode() { 37 | return Objects.hash(code, message); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/ard/json/ArdTeasersDeserializer.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.ard.json; 2 | 3 | import com.google.gson.JsonArray; 4 | import com.google.gson.JsonElement; 5 | import com.google.gson.JsonObject; 6 | import mServer.crawler.sender.ard.ArdConstants; 7 | import mServer.crawler.sender.ard.ArdFilmInfoDto; 8 | import mServer.crawler.sender.base.JsonUtils; 9 | 10 | import java.util.Objects; 11 | import java.util.Optional; 12 | import java.util.Set; 13 | import java.util.stream.Collectors; 14 | import java.util.stream.StreamSupport; 15 | 16 | abstract class ArdTeasersDeserializer { 17 | 18 | private static final String ELEMENT_LINKS = "links"; 19 | private static final String ELEMENT_TARGET = "target"; 20 | 21 | private static final String ATTRIBUTE_ID = "id"; 22 | private static final String ATTRIBUTE_NUMBER_OF_CLIPS = "numberOfClips"; 23 | 24 | Set parseTeasers(final JsonArray teasers) { 25 | return StreamSupport.stream(teasers.spliterator(), true) 26 | .map(JsonElement::getAsJsonObject) 27 | .map(this::toFilmInfo) 28 | .filter(Objects::nonNull) 29 | .collect(Collectors.toSet()); 30 | } 31 | 32 | private ArdFilmInfoDto toFilmInfo(final JsonObject teaserObject) { 33 | return toId(teaserObject) 34 | .map(id -> createFilmInfo(id, getNumberOfClips(teaserObject))) 35 | .orElse(null); 36 | } 37 | 38 | private int getNumberOfClips(final JsonObject teaserObject) { 39 | if (teaserObject.has(ATTRIBUTE_NUMBER_OF_CLIPS)) { 40 | return teaserObject.get(ATTRIBUTE_NUMBER_OF_CLIPS).getAsInt(); 41 | } 42 | return 0; 43 | } 44 | 45 | private Optional toId(final JsonObject teaserObject) { 46 | if (JsonUtils.checkTreePath(teaserObject, ELEMENT_LINKS, ELEMENT_TARGET)) { 47 | final JsonObject targetObject = 48 | teaserObject.get(ELEMENT_LINKS).getAsJsonObject().get(ELEMENT_TARGET).getAsJsonObject(); 49 | return JsonUtils.getAttributeAsString(targetObject, ATTRIBUTE_ID); 50 | } 51 | return JsonUtils.getAttributeAsString(teaserObject, ATTRIBUTE_ID); 52 | } 53 | 54 | private ArdFilmInfoDto createFilmInfo(final String id, final int numberOfClips) { 55 | final String url = String.format(ArdConstants.ITEM_URL, id); 56 | return new ArdFilmInfoDto(id, url, numberOfClips); 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/ard/json/ArdTopicPageDeserializer.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.ard.json; 2 | 3 | import com.google.gson.*; 4 | import mServer.crawler.sender.ard.ArdFilmInfoDto; 5 | import mServer.crawler.sender.ard.ArdTopicInfoDto; 6 | import mServer.crawler.sender.base.JsonUtils; 7 | 8 | import java.lang.reflect.Type; 9 | import java.util.HashSet; 10 | import java.util.Optional; 11 | import java.util.Set; 12 | 13 | public class ArdTopicPageDeserializer extends ArdTeasersDeserializer 14 | implements JsonDeserializer { 15 | 16 | private static final String ELEMENT_ID = "id"; 17 | private static final String ELEMENT_TEASERS = "teasers"; 18 | private static final String ELEMENT_PAGE_NUMBER = "pageNumber"; 19 | private static final String ELEMENT_TOTAL_ELEMENTS = "totalElements"; 20 | private static final String ELEMENT_PAGE_SIZE = "pageSize"; 21 | private static final String ELEMENT_PAGINATION = "pagination"; 22 | 23 | @Override 24 | public ArdTopicInfoDto deserialize( 25 | final JsonElement showPageElement, final Type type, final JsonDeserializationContext context) { 26 | final Set results = new HashSet<>(); 27 | final ArdTopicInfoDto ardTopicInfoDto = new ArdTopicInfoDto(results); 28 | 29 | final JsonObject showPageObject = showPageElement.getAsJsonObject(); 30 | if (showPageObject.has(ELEMENT_TEASERS)) { 31 | final JsonArray teasers = showPageObject.get(ELEMENT_TEASERS).getAsJsonArray(); 32 | results.addAll(parseTeasers(teasers)); 33 | } 34 | final Optional id = JsonUtils.getAttributeAsString(showPageObject, ELEMENT_ID); 35 | id.ifPresent(ardTopicInfoDto::setId); 36 | 37 | final JsonElement paginationElement = showPageObject.get(ELEMENT_PAGINATION); 38 | ardTopicInfoDto.setSubPageNumber( 39 | getChildElementAsIntOrNullIfNotExist(paginationElement, ELEMENT_PAGE_NUMBER)); 40 | final int totalElements = getChildElementAsIntOrNullIfNotExist(paginationElement, ELEMENT_TOTAL_ELEMENTS); 41 | final int pageSize = getChildElementAsIntOrNullIfNotExist(paginationElement, ELEMENT_PAGE_SIZE); 42 | ardTopicInfoDto.setMaxSubPageNumber(pageSize == 0 ? 0 : 43 | (totalElements + pageSize - 1) / pageSize); 44 | 45 | return ardTopicInfoDto; 46 | } 47 | 48 | private int getChildElementAsIntOrNullIfNotExist( 49 | final JsonElement parentElement, final String childElementName) { 50 | if (parentElement == null || parentElement.isJsonNull()) { 51 | return 0; 52 | } 53 | return getJsonElementAsIntOrNullIfNotExist( 54 | parentElement.getAsJsonObject().get(childElementName)); 55 | } 56 | 57 | private int getJsonElementAsIntOrNullIfNotExist(final JsonElement element) { 58 | if (element.isJsonNull()) { 59 | return 0; 60 | } 61 | return element.getAsInt(); 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/ard/json/ArdTopicsDeserializer.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.ard.json; 2 | 3 | 4 | import com.google.gson.JsonArray; 5 | import com.google.gson.JsonDeserializationContext; 6 | import com.google.gson.JsonDeserializer; 7 | import com.google.gson.JsonElement; 8 | import mServer.crawler.sender.ard.ArdConstants; 9 | import mServer.crawler.sender.base.CrawlerUrlDTO; 10 | import mServer.crawler.sender.base.JsonUtils; 11 | 12 | import java.lang.reflect.Type; 13 | import java.util.HashSet; 14 | import java.util.Optional; 15 | import java.util.Set; 16 | 17 | public class ArdTopicsDeserializer implements JsonDeserializer> { 18 | private static final String ELEMENT_WIDGETS = "widgets"; 19 | private static final String ELEMENT_LINKS = "links"; 20 | private static final String ELEMENT_SELF = "self"; 21 | 22 | private static final String ATTRIBUTE_ID = "id"; 23 | private final String sender; 24 | 25 | public ArdTopicsDeserializer(String sender) { 26 | this.sender = sender; 27 | } 28 | 29 | @Override 30 | public Set deserialize( 31 | JsonElement jsonElement, Type type, JsonDeserializationContext jsonDeserializationContext) { 32 | final Set result = new HashSet<>(); 33 | 34 | if (JsonUtils.hasElements(jsonElement, ELEMENT_WIDGETS)) { 35 | final JsonArray widgets = jsonElement.getAsJsonObject().getAsJsonArray(ELEMENT_WIDGETS); 36 | widgets.forEach(widget -> parseWidget(widget.getAsJsonObject()).ifPresent(result::add)); 37 | } 38 | 39 | return result; 40 | } 41 | 42 | private Optional parseWidget(final JsonElement compilation) { 43 | if (JsonUtils.hasElements(compilation, ELEMENT_LINKS)) { 44 | final JsonElement selfLink = 45 | compilation.getAsJsonObject().get(ELEMENT_LINKS).getAsJsonObject().get(ELEMENT_SELF); 46 | final Optional id = 47 | JsonUtils.getAttributeAsString(selfLink.getAsJsonObject(), ATTRIBUTE_ID); 48 | if (id.isPresent()) { 49 | return Optional.of( 50 | new CrawlerUrlDTO( 51 | String.format( 52 | ArdConstants.TOPICS_COMPILATION_URL, 53 | sender, 54 | id.get(), 55 | ArdConstants.TOPICS_COMPILATION_PAGE_SIZE))); 56 | } 57 | } 58 | 59 | return Optional.empty(); 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/ard/json/ArdVideoInfoDto.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.ard.json; 2 | 3 | import java.util.EnumMap; 4 | import java.util.Map; 5 | import java.util.Optional; 6 | import mServer.crawler.sender.base.Qualities; 7 | import org.apache.commons.lang3.StringUtils; 8 | 9 | /** 10 | * Video information from 11 | * {@literal http://www.ardmediathek.de/play/media/[documentId]?devicetype=pc&features=flash}. 12 | */ 13 | public class ArdVideoInfoDto { 14 | 15 | private final Map videoUrls; 16 | private String subtitleUrl; 17 | 18 | public ArdVideoInfoDto() { 19 | videoUrls = new EnumMap<>(Qualities.class); 20 | } 21 | 22 | public Qualities getDefaultQuality() { 23 | if (videoUrls.containsKey(Qualities.NORMAL)) { 24 | return Qualities.NORMAL; 25 | } 26 | return videoUrls.keySet().iterator().next(); 27 | } 28 | 29 | public String getDefaultVideoUrl() { 30 | return videoUrls.get(getDefaultQuality()); 31 | } 32 | 33 | public String getSubtitleUrl() { 34 | return subtitleUrl; 35 | } 36 | 37 | public Optional getSubtitleUrlOptional() { 38 | if (StringUtils.isNotBlank(subtitleUrl)) { 39 | return Optional.of(subtitleUrl); 40 | } 41 | 42 | return Optional.empty(); 43 | } 44 | 45 | public Map getVideoUrls() { 46 | return videoUrls; 47 | } 48 | 49 | public boolean containsQualities(final Qualities key) { 50 | return videoUrls.containsKey(key); 51 | } 52 | 53 | public String put(final Qualities key, final String value) { 54 | return videoUrls.put(key, value); 55 | } 56 | 57 | public void setSubtitleUrl(final String subtitleUrl) { 58 | this.subtitleUrl = subtitleUrl; 59 | } 60 | 61 | } 62 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/ard/tasks/ArdDayPageTask.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.ard.tasks; 2 | 3 | import com.google.gson.reflect.TypeToken; 4 | import de.mediathekview.mlib.Config; 5 | import java.lang.reflect.Type; 6 | import java.util.Set; 7 | import java.util.concurrent.ConcurrentLinkedQueue; 8 | import jakarta.ws.rs.client.WebTarget; 9 | import mServer.crawler.sender.MediathekReader; 10 | import mServer.crawler.sender.ard.ArdFilmInfoDto; 11 | import mServer.crawler.sender.ard.json.ArdDayPageDeserializer; 12 | import mServer.crawler.sender.base.CrawlerUrlDTO; 13 | import mServer.crawler.sender.base.AbstractRecursivConverterTask; 14 | 15 | public class ArdDayPageTask extends ArdTaskBase { 16 | 17 | private static final Type SET_FILMINFO_TYPE_TOKEN = new TypeToken>() { 18 | }.getType(); 19 | 20 | public ArdDayPageTask(MediathekReader aCrawler, 21 | ConcurrentLinkedQueue aUrlToCrawlDTOs) { 22 | super(aCrawler, aUrlToCrawlDTOs); 23 | 24 | registerJsonDeserializer(SET_FILMINFO_TYPE_TOKEN, new ArdDayPageDeserializer()); 25 | } 26 | 27 | @Override 28 | protected void processRestTarget(CrawlerUrlDTO aDTO, WebTarget aTarget) { 29 | if (Config.getStop()) { 30 | return; 31 | } 32 | 33 | Set filmUrls = deserialize(aTarget, SET_FILMINFO_TYPE_TOKEN); 34 | 35 | if (filmUrls != null && !filmUrls.isEmpty()) { 36 | taskResults.addAll(filmUrls); 37 | } 38 | } 39 | 40 | @Override 41 | protected AbstractRecursivConverterTask createNewOwnInstance(ConcurrentLinkedQueue aElementsToProcess) { 42 | return new ArdDayPageTask(crawler, aElementsToProcess); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/ard/tasks/ArdTopicsTask.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.ard.tasks; 2 | 3 | import com.google.gson.reflect.TypeToken; 4 | import jakarta.ws.rs.client.WebTarget; 5 | import mServer.crawler.sender.MediathekReader; 6 | import mServer.crawler.sender.ard.json.ArdTopicsDeserializer; 7 | import mServer.crawler.sender.base.AbstractRecursivConverterTask; 8 | import mServer.crawler.sender.base.CrawlerUrlDTO; 9 | import org.apache.logging.log4j.LogManager; 10 | import org.apache.logging.log4j.Logger; 11 | 12 | import java.lang.reflect.Type; 13 | import java.util.Set; 14 | import java.util.concurrent.ConcurrentLinkedQueue; 15 | 16 | public class ArdTopicsTask extends ArdTaskBase { 17 | private static final Logger LOG = LogManager.getLogger(ArdTopicsTask.class); 18 | private static final Type SET_CRAWLER_URL_TYPE_TOKEN = new TypeToken>() { 19 | }.getType(); 20 | private final String sender; 21 | 22 | public ArdTopicsTask(MediathekReader aCrawler, String sender, ConcurrentLinkedQueue urlToCrawlDTOs) { 23 | super(aCrawler, urlToCrawlDTOs); 24 | this.sender = sender; 25 | registerJsonDeserializer(SET_CRAWLER_URL_TYPE_TOKEN, new ArdTopicsDeserializer(sender)); 26 | } 27 | 28 | @Override 29 | protected AbstractRecursivConverterTask createNewOwnInstance( 30 | ConcurrentLinkedQueue aElementsToProcess) { 31 | return new ArdTopicsTask(this.crawler, sender, aElementsToProcess); 32 | } 33 | 34 | @Override 35 | protected void processRestTarget(CrawlerUrlDTO aDTO, WebTarget aTarget) { 36 | final Set results = deserialize(aTarget, SET_CRAWLER_URL_TYPE_TOKEN); 37 | LOG.debug("Found {} topics for {}.", results.size(), sender); 38 | taskResults.addAll(results); 39 | 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/arte/ArteCategoryFilmListDeserializer.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.arte; 2 | 3 | import com.google.gson.*; 4 | import de.mediathekview.mlib.tool.Log; 5 | 6 | import java.lang.reflect.Type; 7 | import java.util.Optional; 8 | 9 | /** 10 | * Deserialisiert Ergebnisse der Anfrage den Filmen einer Kategorie. 11 | * Beispiel-URL: 12 | * https://www.arte.tv/guide/api/api/zones/de/web/videos_subcategory_CMG/?page=1&limit=100 13 | */ 14 | public class ArteCategoryFilmListDeserializer extends ArteListBaseDeserializer implements JsonDeserializer { 15 | 16 | private static final String JSON_ELEMENT_CONTENT = "content"; 17 | private static final String JSON_ELEMENT_VALUE = "value"; 18 | private static final String JSON_ELEMENT_ZONES = "zones"; 19 | 20 | @Override 21 | public ArteCategoryFilmsDTO deserialize(JsonElement aJsonElement, Type aType, JsonDeserializationContext aContext) throws JsonParseException { 22 | ArteCategoryFilmsDTO dto = new ArteCategoryFilmsDTO(); 23 | 24 | JsonElement rootElement = aJsonElement; 25 | if(aJsonElement.getAsJsonObject().has(JSON_ELEMENT_VALUE)) { 26 | rootElement = aJsonElement.getAsJsonObject().get(JSON_ELEMENT_VALUE); 27 | } 28 | final JsonElement zoneElement = rootElement.getAsJsonObject().get(JSON_ELEMENT_ZONES); 29 | if (zoneElement == null || zoneElement.isJsonNull() || !zoneElement.isJsonArray()) { 30 | Log.errorLog(12834940, "zones element not found"); 31 | return dto; 32 | } 33 | 34 | for (JsonElement jsonElement : zoneElement.getAsJsonArray()) { 35 | if(jsonElement.getAsJsonObject().has(JSON_ELEMENT_CONTENT)) { 36 | final JsonObject contentObject = jsonElement.getAsJsonObject().get(JSON_ELEMENT_CONTENT).getAsJsonObject(); 37 | extractProgramIdFromData(contentObject, dto); 38 | 39 | Optional url = parsePagination(contentObject); 40 | url.ifPresent(dto::setNextPageUrl); 41 | } 42 | } 43 | 44 | return dto; 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/arte/ArteCategoryFilmsDTO.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.arte; 2 | 3 | import java.util.HashSet; 4 | import java.util.Set; 5 | 6 | public class ArteCategoryFilmsDTO { 7 | 8 | private final Set programIds = new HashSet<>(); 9 | private final Set collectionIds = new HashSet<>(); 10 | 11 | private String nextPageUrl; 12 | 13 | public void addProgramId(String aProgramId) { 14 | programIds.add(aProgramId); 15 | } 16 | public void addCollection(String aCollectionId) { 17 | collectionIds.add(aCollectionId); 18 | } 19 | 20 | public Set getProgramIds() { 21 | return programIds; 22 | } 23 | public Set getCollectionIds() { 24 | return collectionIds; 25 | } 26 | 27 | public boolean hasNextPage() { 28 | return nextPageUrl != null && !nextPageUrl.isEmpty(); 29 | } 30 | 31 | public String getNextPageUrl() { 32 | return nextPageUrl; 33 | } 34 | public void setNextPageUrl(String url) { 35 | nextPageUrl = url; 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/arte/ArteCollectionChildDeserializer.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.arte; 2 | 3 | import com.google.gson.*; 4 | import mServer.crawler.sender.base.JsonUtils; 5 | 6 | import java.lang.reflect.Type; 7 | 8 | public class ArteCollectionChildDeserializer implements JsonDeserializer { 9 | private static final String ATTRIBUTE_PROGRAM_ID = "programId"; 10 | private static final String ELEMENT_PROGRAMS = "programs"; 11 | private static final String ELEMENT_VIDEOS = "videos"; 12 | 13 | public ArteCategoryFilmsDTO deserialize( 14 | final JsonElement aJsonElement, 15 | final Type aType, 16 | final JsonDeserializationContext aJsonDeserializationContext) 17 | throws JsonParseException { 18 | final ArteCategoryFilmsDTO result = new ArteCategoryFilmsDTO(); 19 | if (aJsonElement.isJsonObject()) { 20 | final JsonObject mainObj = aJsonElement.getAsJsonObject(); 21 | 22 | if (JsonUtils.checkTreePath(mainObj, ELEMENT_PROGRAMS)) { 23 | final JsonArray programs = mainObj.get(ELEMENT_PROGRAMS).getAsJsonArray(); 24 | programs.forEach( 25 | program -> { 26 | final JsonObject programObject = program.getAsJsonObject(); 27 | if (JsonUtils.checkTreePath(programObject, ELEMENT_VIDEOS)) { 28 | programObject 29 | .get(ELEMENT_VIDEOS) 30 | .getAsJsonArray() 31 | .forEach( 32 | filmElement -> 33 | JsonUtils.getAttributeAsString(filmElement.getAsJsonObject(), ATTRIBUTE_PROGRAM_ID) 34 | .ifPresent(result::addProgramId)); 35 | } 36 | }); 37 | } 38 | } 39 | return result; 40 | } 41 | 42 | } 43 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/arte/ArteCollectionParentDeserializer.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.arte; 2 | 3 | 4 | import com.google.gson.*; 5 | import mServer.crawler.sender.base.JsonUtils; 6 | 7 | import java.lang.reflect.Type; 8 | import java.util.Optional; 9 | 10 | public class ArteCollectionParentDeserializer implements JsonDeserializer { 11 | private static final String ATTRIBUTE_KIND = "kind"; 12 | private static final String ATTRIBUTE_PROGRAM_ID = "programId"; 13 | private static final String ELEMENT_PROGRAMS = "programs"; 14 | private static final String ELEMENT_CHILDREN = "children"; 15 | 16 | public ArteCategoryFilmsDTO deserialize(final JsonElement aJsonElement, final Type aType, final JsonDeserializationContext aJsonDeserializationContext) throws JsonParseException { 17 | final ArteCategoryFilmsDTO result = new ArteCategoryFilmsDTO(); 18 | if (aJsonElement.isJsonObject()) { 19 | final JsonObject mainObj = aJsonElement.getAsJsonObject(); 20 | 21 | if (JsonUtils.checkTreePath(mainObj, ELEMENT_PROGRAMS)) { 22 | final JsonArray programs = mainObj.get(ELEMENT_PROGRAMS).getAsJsonArray(); 23 | programs.forEach(program -> { 24 | final JsonObject programObject = program.getAsJsonObject(); 25 | if (JsonUtils.checkTreePath(programObject, ELEMENT_CHILDREN)) { 26 | programObject.get(ELEMENT_CHILDREN).getAsJsonArray().forEach(filmElement -> { 27 | final JsonObject filmObject = filmElement.getAsJsonObject(); 28 | final Optional kind = JsonUtils.getAttributeAsString(filmObject, ATTRIBUTE_KIND); 29 | final Optional programId = JsonUtils.getAttributeAsString(filmObject, ATTRIBUTE_PROGRAM_ID); 30 | 31 | if (kind.isPresent() && kind.get().equalsIgnoreCase("TV_SERIES") && programId.isPresent()) { 32 | result.addCollection(programId.get()); 33 | } 34 | }); 35 | } 36 | }); 37 | } 38 | } 39 | return result; 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/arte/ArteDatenFilmDeserializer.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.arte; 2 | 3 | import java.lang.reflect.Type; 4 | import java.util.ArrayList; 5 | import java.util.Collection; 6 | import java.util.List; 7 | import java.util.Objects; 8 | import java.util.stream.Collectors; 9 | 10 | import com.google.gson.JsonDeserializationContext; 11 | import com.google.gson.JsonDeserializer; 12 | import com.google.gson.JsonElement; 13 | import com.google.gson.JsonParseException; 14 | 15 | import de.mediathekview.mlib.daten.DatenFilm; 16 | import de.mediathekview.mlib.daten.ListeFilme; 17 | import java.util.Set; 18 | 19 | public class ArteDatenFilmDeserializer implements JsonDeserializer { 20 | 21 | private static final String JSON_ELEMENT_VIDEOS = "videos"; 22 | 23 | private final String langCode; 24 | private final String senderName; 25 | 26 | public ArteDatenFilmDeserializer(String aLangCode, String aSenderName) { 27 | langCode = aLangCode; 28 | senderName = aSenderName; 29 | } 30 | 31 | @Override 32 | public ListeFilme deserialize(JsonElement aJsonElement, Type aType, JsonDeserializationContext aContext) throws JsonParseException { 33 | ListeFilme listeFilme = new ListeFilme(); 34 | 35 | Collection futureFilme = new ArrayList<>(); 36 | for (JsonElement jsonElement : aJsonElement.getAsJsonObject().get(JSON_ELEMENT_VIDEOS).getAsJsonArray()) { 37 | Set films = new ArteJsonObjectToDatenFilmCallable(jsonElement.getAsJsonObject(), langCode, senderName).call(); 38 | for (DatenFilm film : films) { 39 | futureFilme.add(film); 40 | } 41 | } 42 | 43 | final List list = futureFilme.parallelStream() 44 | .filter(Objects::nonNull) 45 | .collect(Collectors.toList()); 46 | listeFilme.addAll(list); 47 | list.clear(); 48 | 49 | return listeFilme; 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/arte/ArteJsonObjectToDatenFilmCallable.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.arte; 2 | 3 | import java.util.concurrent.Callable; 4 | 5 | import de.mediathekview.mlib.tool.Log; 6 | import org.apache.logging.log4j.LogManager; 7 | import org.apache.logging.log4j.Logger; 8 | 9 | import com.google.gson.JsonObject; 10 | 11 | import de.mediathekview.mlib.daten.DatenFilm; 12 | import java.util.HashSet; 13 | import java.util.Set; 14 | 15 | public class ArteJsonObjectToDatenFilmCallable implements Callable> { 16 | 17 | private static final Logger LOG = LogManager.getLogger(ArteJsonObjectToDatenFilmCallable.class); 18 | 19 | private static final String JSON_ELEMENT_KEY_PROGRAM_ID = "programId"; 20 | 21 | private final JsonObject jsonObject; 22 | private final String langCode; 23 | private final String senderName; 24 | 25 | public ArteJsonObjectToDatenFilmCallable(JsonObject aJsonObjec, String aLangCode, String aSenderName) { 26 | jsonObject = aJsonObjec; 27 | langCode = aLangCode; 28 | senderName = aSenderName; 29 | } 30 | 31 | @Override 32 | public Set call() { 33 | Set films = new HashSet<>(); 34 | try { 35 | if (isValidProgramObject(jsonObject)) { 36 | String programId = getElementValue(jsonObject, JSON_ELEMENT_KEY_PROGRAM_ID); 37 | films = new ArteProgramIdToDatenFilmCallable(programId, langCode, senderName).call(); 38 | } 39 | } catch (Exception e) { 40 | e.printStackTrace(); 41 | LOG.error(e); 42 | Log.errorLog(348833773, e); 43 | } 44 | 45 | return films; 46 | } 47 | 48 | private static String getElementValue(JsonObject jsonObject, String elementName) { 49 | return !jsonObject.get(elementName).isJsonNull() ? jsonObject.get(elementName).getAsString() : ""; 50 | } 51 | 52 | private static boolean isValidProgramObject(JsonObject programObject) { 53 | return programObject.has(JSON_ELEMENT_KEY_PROGRAM_ID) 54 | && !programObject.get(JSON_ELEMENT_KEY_PROGRAM_ID).isJsonNull(); 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/arte/ArteSubPageDeserializer.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.arte; 2 | 3 | import com.google.gson.*; 4 | 5 | import java.lang.reflect.Type; 6 | import java.util.Optional; 7 | 8 | public class ArteSubPageDeserializer extends ArteListBaseDeserializer implements JsonDeserializer { 9 | private static final String JSON_ELEMENT_VALUE = "value"; 10 | 11 | @Override 12 | public ArteCategoryFilmsDTO deserialize(JsonElement aJsonElement, Type aType, JsonDeserializationContext aContext) throws JsonParseException { 13 | final ArteCategoryFilmsDTO dto = new ArteCategoryFilmsDTO(); 14 | 15 | JsonElement rootElement = aJsonElement; 16 | if (aJsonElement.getAsJsonObject().has(JSON_ELEMENT_VALUE)) { 17 | rootElement = aJsonElement.getAsJsonObject().get(JSON_ELEMENT_VALUE); 18 | } 19 | 20 | JsonObject rootObject = rootElement.getAsJsonObject(); 21 | extractProgramIdFromData(rootObject, dto); 22 | 23 | Optional url = parsePagination(rootObject); 24 | url.ifPresent(dto::setNextPageUrl); 25 | 26 | return dto; 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/arte/ArteVideoDTO.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.arte; 2 | 3 | import mServer.crawler.sender.base.Qualities; 4 | 5 | import java.time.Duration; 6 | import java.util.EnumMap; 7 | import java.util.Map; 8 | 9 | public class ArteVideoDTO { 10 | 11 | private Duration duration; 12 | private final Map videoUrls; 13 | private final Map videoUrlsWithSubtitle; 14 | private final Map videoUrlsWithAudioDescription; 15 | private final Map videoUrlsOriginal; 16 | private final Map videoUrlsOriginalWithSubtitle; 17 | 18 | public ArteVideoDTO() { 19 | videoUrls = new EnumMap<>(Qualities.class); 20 | videoUrlsWithSubtitle = new EnumMap<>(Qualities.class); 21 | videoUrlsWithAudioDescription = new EnumMap<>(Qualities.class); 22 | videoUrlsOriginal = new EnumMap<>(Qualities.class); 23 | videoUrlsOriginalWithSubtitle = new EnumMap<>(Qualities.class); 24 | duration = Duration.ZERO; 25 | } 26 | 27 | public void addVideo(Qualities aQualities, String aUrl) { 28 | videoUrls.put(aQualities, aUrl); 29 | } 30 | 31 | public void addVideoWithAudioDescription(Qualities qualities, String url) { 32 | videoUrlsWithAudioDescription.put(qualities, url); 33 | } 34 | 35 | public void addVideoWithSubtitle(Qualities qualities, String url) { 36 | videoUrlsWithSubtitle.put(qualities, url); 37 | } 38 | public void addVideoOriginal(Qualities qualities, String url) { 39 | videoUrlsOriginal.put(qualities, url); 40 | } 41 | public void addVideoOriginalWithSubtitle(Qualities qualities, String url) { 42 | videoUrlsOriginalWithSubtitle.put(qualities, url); 43 | } 44 | 45 | public Duration getDuration() { return duration; } 46 | 47 | public Map getVideoUrls() { 48 | return videoUrls; 49 | } 50 | 51 | public Map getVideoUrlsWithAudioDescription() { 52 | return videoUrlsWithAudioDescription; 53 | } 54 | 55 | public Map getVideoUrlsWithSubtitle() { 56 | return videoUrlsWithSubtitle; 57 | } 58 | 59 | public Map getVideoUrlsOriginal() { 60 | return videoUrlsOriginal; 61 | } 62 | 63 | public Map getVideoUrlsOriginalWithSubtitle() { 64 | return videoUrlsOriginalWithSubtitle; 65 | } 66 | 67 | public void setDuration(Duration duration) { 68 | this.duration = duration; 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/arte/ArteVideoDetailsDTO.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.arte; 2 | 3 | import mServer.crawler.sender.base.GeoLocations; 4 | 5 | import java.time.Duration; 6 | 7 | public class ArteVideoDetailsDTO { 8 | private String broadcastBegin = ""; 9 | private GeoLocations geo = GeoLocations.GEO_NONE; 10 | 11 | private String title = ""; 12 | private String theme = ""; 13 | private String description = ""; 14 | private String website = ""; 15 | private Duration duration = Duration.ZERO; 16 | 17 | public String getBroadcastBegin() { 18 | return this.broadcastBegin; 19 | } 20 | 21 | public void setBroadcastBegin(String aBroadcastBegin) { 22 | this.broadcastBegin = aBroadcastBegin; 23 | } 24 | 25 | public GeoLocations getGeoLocation() { 26 | return this.geo; 27 | } 28 | 29 | public void setGeoLocation(GeoLocations aGeo) { 30 | this.geo = aGeo; 31 | } 32 | 33 | public String getTitle() { 34 | return title; 35 | } 36 | public void setTitle(String aTitle) { 37 | title = aTitle; 38 | } 39 | public void setTheme(String aTheme) { 40 | theme = aTheme; 41 | } 42 | public void setDescription(String aDescription) { 43 | description = aDescription; 44 | } 45 | public void setWebsite(String aWebsite) { 46 | website = aWebsite; 47 | } 48 | public String getTheme() { 49 | return theme; 50 | } 51 | public String getDescription() { 52 | return description; 53 | } 54 | public String getWebsite() { 55 | return website; 56 | } 57 | 58 | public void setDuration(Duration duration) { this.duration = duration; } 59 | public Duration getDuration() { return duration; } 60 | } 61 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/arte/ArteVideoType.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.arte; 2 | 3 | public enum ArteVideoType { 4 | DEFAULT, 5 | SUBTITLE_INCLUDED, 6 | ORIGINAL_WITH_SUBTITLE, ORIGINAL, AUDIO_DESCRIPTION 7 | } -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/base/AbstractUrlTask.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.base; 2 | 3 | import com.google.gson.JsonObject; 4 | import java.util.concurrent.ConcurrentLinkedQueue; 5 | import java.util.concurrent.RecursiveTask; 6 | import mServer.crawler.FilmeSuchen; 7 | import mServer.crawler.RunSender; 8 | import mServer.crawler.sender.MediathekReader; 9 | 10 | /** 11 | * This task is based on {@link RecursiveTask} and takes a 12 | * {@link ConcurrentLinkedQueue} of {@link D}. It splits the URLs on instances 13 | * of it self based on the crawler configuration and calls the 14 | * {@link this#processUrl(CrawlerUrlDTO)} for each. 15 | * 16 | * @author Nicklas Wiegandt (Nicklas2751)
17 | * Mail: nicklas@wiegandt.eu
18 | * Jabber: nicklas2751@elaon.de
19 | * Riot.im: nicklas2751:matrix.elaon.de
20 | * 21 | * @param The type of objects which will be created from this task. 22 | * @param A sub type of {@link CrawlerUrlDTO} which this task will use to 23 | * create the result objects. 24 | */ 25 | public abstract class AbstractUrlTask 26 | extends AbstractRecursivConverterTask { 27 | 28 | private static final long serialVersionUID = -4077156510484515410L; 29 | 30 | public AbstractUrlTask(final MediathekReader aCrawler, 31 | final ConcurrentLinkedQueue aUrlToCrawlDTOs) { 32 | super(aCrawler, aUrlToCrawlDTOs); 33 | } 34 | 35 | @Override 36 | protected Integer getMaxElementsToProcess() { 37 | return 100; 38 | } 39 | 40 | @Deprecated 41 | protected void processElement(final D aDTO) { 42 | processElement(aDTO); 43 | } 44 | 45 | protected void traceRequest() { 46 | increment(RunSender.Count.ANZAHL); 47 | } 48 | 49 | protected void traceRequest(long responseLength) { 50 | traceRequest(); 51 | if (responseLength > 0) { 52 | increment(RunSender.Count.SUM_DATA_BYTE, responseLength); 53 | increment(RunSender.Count.SUM_TRAFFIC_BYTE, responseLength); 54 | } 55 | } 56 | 57 | private void increment(final RunSender.Count count) { 58 | FilmeSuchen.listeSenderLaufen.inc(this.crawler.getSendername(), count); 59 | } 60 | 61 | private void increment(final RunSender.Count count, final long value) { 62 | FilmeSuchen.listeSenderLaufen.inc(this.crawler.getSendername(), count, value); 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/base/CrawlerUrlDTO.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.base; 2 | 3 | import java.net.URL; 4 | 5 | public class CrawlerUrlDTO { 6 | 7 | protected static final String HTTPS = "https:"; 8 | private String url; 9 | 10 | public CrawlerUrlDTO(final String aUrl) { 11 | setUrl(aUrl); 12 | } 13 | 14 | public CrawlerUrlDTO(final URL aUrl) { 15 | this(aUrl.toString()); 16 | } 17 | 18 | @Override 19 | public boolean equals(final Object obj) { 20 | if (this == obj) { 21 | return true; 22 | } 23 | if (obj == null) { 24 | return false; 25 | } 26 | if (getClass() != obj.getClass()) { 27 | return false; 28 | } 29 | final CrawlerUrlDTO other = (CrawlerUrlDTO) obj; 30 | if (url == null) { 31 | if (other.url != null) { 32 | return false; 33 | } 34 | } else if (!url.equals(other.url)) { 35 | return false; 36 | } 37 | return true; 38 | } 39 | 40 | public String getUrl() { 41 | return url; 42 | } 43 | 44 | @Override 45 | public int hashCode() { 46 | final int prime = 31; 47 | int result = 1; 48 | result = prime * result + (url == null ? 0 : url.hashCode()); 49 | return result; 50 | } 51 | 52 | public void setUrl(final String aUrl) { 53 | url = aUrl; 54 | if (url.startsWith("//")) { 55 | url = HTTPS + url; 56 | } 57 | } 58 | 59 | } 60 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/base/DateUtils.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.base; 2 | 3 | public class DateUtils { 4 | 5 | private static final String SPLITTED_NUMBERS_REGEX_PATTERN = "$1:$2"; 6 | private static final String SPLIT_NUMBERS_REGEX_PATTERN = "(\\+\\d{1,2})(\\d{1,2})"; 7 | 8 | private DateUtils() { 9 | super(); 10 | } 11 | 12 | // Java 8 misses a ISO 8601 support. See: 13 | // https://stackoverflow.com/questions/2201925/converting-iso-8601-compliant-string-to-java-util-date 14 | public static String changeDateTimeForMissingISO8601Support(final String aDateTimeString) { 15 | return aDateTimeString.replaceAll(SPLIT_NUMBERS_REGEX_PATTERN, SPLITTED_NUMBERS_REGEX_PATTERN); 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/base/FilmUrlInfoDto.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.base; 2 | 3 | import java.util.Objects; 4 | import java.util.Optional; 5 | 6 | public class FilmUrlInfoDto { 7 | 8 | private final String url; 9 | private int height; 10 | private int width; 11 | private final Optional fileType; 12 | 13 | public FilmUrlInfoDto(final String aUrl) { 14 | this(aUrl, 0, 0); 15 | } 16 | 17 | public FilmUrlInfoDto(final String aUrl, final int aWidth, final int aHeight) { 18 | url = aUrl; 19 | fileType = UrlUtils.getFileType(aUrl); 20 | width = aWidth; 21 | height = aHeight; 22 | } 23 | 24 | public String getUrl() { 25 | return url; 26 | } 27 | 28 | public int getHeight() { 29 | return height; 30 | } 31 | 32 | public int getWidth() { 33 | return width; 34 | } 35 | 36 | public Optional getFileType() { 37 | return fileType; 38 | } 39 | 40 | public void setResolution(final int aWidth, final int aHeight) { 41 | width = aWidth; 42 | height = aHeight; 43 | } 44 | 45 | @Override 46 | public boolean equals(final Object o) { 47 | if (this == o) { 48 | return true; 49 | } 50 | if (o == null || getClass() != o.getClass()) { 51 | return false; 52 | } 53 | final FilmUrlInfoDto that = (FilmUrlInfoDto) o; 54 | return height == that.height 55 | && width == that.width 56 | && Objects.equals(url, that.url) 57 | && Objects.equals(fileType, that.fileType); 58 | } 59 | 60 | @Override 61 | public int hashCode() { 62 | return Objects.hash(url, height, width, fileType); 63 | } 64 | 65 | @Override 66 | public String toString() { 67 | return "FilmUrlInfoDto{" 68 | + "url='" 69 | + url 70 | + '\'' 71 | + ", height=" 72 | + height 73 | + ", width=" 74 | + width 75 | + ", fileType=" 76 | + fileType 77 | + '}'; 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/base/GeoLocations.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.base; 2 | 3 | import org.apache.commons.lang3.StringUtils; 4 | 5 | import java.util.Optional; 6 | 7 | /** 8 | * The available GEO locations. 9 | */ 10 | public enum GeoLocations 11 | { 12 | GEO_NONE("", "WELT", "none", "world", "ALL", ""), // nur in .. zu sehen 13 | GEO_DE("DE"), 14 | GEO_AT("AT"), 15 | GEO_CH("CH"), 16 | GEO_EU("EU"), 17 | GEO_DE_FR("DE-FR", "DE_FR"), 18 | GEO_DE_AT_CH("DE-AT-CH", "dach"), 19 | GEO_DE_AT_CH_EU("DE-AT-CH-EU", "SAT", "EBU"), 20 | GEO_DE_AT_CH_FR("DE-AT-CH-FR", "EUR_DE_FR"); 21 | 22 | private final String description; 23 | private final String[] alternatives; 24 | 25 | GeoLocations(final String aDescription, final String... aAlternatives) { 26 | description = aDescription; 27 | alternatives = aAlternatives; 28 | } 29 | 30 | /** 31 | * Finds a GeoLocation based on its description and alternatives. 32 | * 33 | * @param aTerm A term like the description or one of the alternatives. 34 | * @return The GeoLocation if found or else an empty Optional. 35 | */ 36 | public static Optional find(final String aTerm) { 37 | for (final GeoLocations geoLoc : GeoLocations.values()) { 38 | if (geoLoc.getDescription().equalsIgnoreCase(aTerm) 39 | || StringUtils.equalsAnyIgnoreCase(aTerm, geoLoc.alternatives)) { 40 | return Optional.of(geoLoc); 41 | } 42 | } 43 | return Optional.empty(); 44 | } 45 | 46 | public String getDescription() 47 | { 48 | return description; 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/base/JsoupConnection.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.base; 2 | 3 | import org.jsoup.Connection; 4 | import org.jsoup.Jsoup; 5 | import org.jsoup.nodes.Document; 6 | import org.jsoup.parser.Parser; 7 | 8 | import java.io.IOException; 9 | 10 | /** 11 | * Helper Class to get rid of static method call for better testability 12 | */ 13 | public class JsoupConnection { 14 | 15 | public Connection getConnection(String url) { 16 | return Jsoup.connect(url); 17 | } 18 | 19 | public Document getDocument(String url) throws IOException { 20 | return getConnection(url).get(); 21 | } 22 | 23 | public Document getDocumentTimeoutAfter(String url, int timeoutInMilliseconds) throws IOException { 24 | return getConnection(url).timeout(timeoutInMilliseconds).get(); 25 | } 26 | 27 | public Document getDocumentTimeoutAfterAlternativeDocumentType(String url, int timeoutInMilliseconds, Parser parser) throws IOException { 28 | return getConnection(url).timeout(timeoutInMilliseconds).parser(parser).get(); 29 | } 30 | 31 | } 32 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/base/M3U8Constants.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.base; 2 | 3 | public final class M3U8Constants { 4 | 5 | private M3U8Constants() { 6 | } 7 | 8 | public static final String M3U8_BANDWIDTH = "BANDWIDTH"; 9 | public static final String M3U8_CLOSED_CAPTIONS = "CLOSED-CAPTIONS"; 10 | public static final String M3U8_CODECS = "CODECS"; 11 | public static final String M3U8_PROGRAM_ID = "PROGRAM-ID"; 12 | public static final String M3U8_RESOLUTION = "RESOLUTION"; 13 | } 14 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/base/PagedElementListDTO.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.base; 2 | 3 | import java.util.Collection; 4 | import java.util.HashSet; 5 | import java.util.Optional; 6 | import java.util.Set; 7 | 8 | /** 9 | * A simple Data-Transfer-Object to get the elements and the optionally found 10 | * next page link. 11 | * 12 | * @author Nicklas Wiegandt (Nicklas2751)
13 | * Mail: nicklas@wiegandt.eu
14 | * Jabber: nicklas2751@elaon.de
15 | * Riot.im: nicklas2751:matrix.elaon.de
16 | */ 17 | public class PagedElementListDTO { 18 | 19 | private final Set elements; 20 | private Optional nextPage; 21 | 22 | public PagedElementListDTO() { 23 | super(); 24 | elements = new HashSet<>(); 25 | nextPage = Optional.empty(); 26 | } 27 | 28 | public boolean addElements(final Collection elements) { 29 | return this.elements.addAll(elements); 30 | } 31 | 32 | /* 33 | * (non-Javadoc) 34 | * 35 | * @see java.lang.Object#equals(java.lang.Object) 36 | */ 37 | @Override 38 | public boolean equals(final Object obj) { 39 | if (this == obj) { 40 | return true; 41 | } 42 | if (obj == null) { 43 | return false; 44 | } 45 | if (!(obj instanceof PagedElementListDTO)) { 46 | return false; 47 | } 48 | final PagedElementListDTO other = (PagedElementListDTO) obj; 49 | if (elements == null) { 50 | if (other.elements != null) { 51 | return false; 52 | } 53 | } else if (!elements.equals(other.elements)) { 54 | return false; 55 | } 56 | if (nextPage == null) { 57 | return other.nextPage == null; 58 | } else { 59 | return nextPage.equals(other.nextPage); 60 | } 61 | } 62 | 63 | public Set getElements() { 64 | return elements; 65 | } 66 | 67 | public Optional getNextPage() { 68 | return nextPage; 69 | } 70 | 71 | public void setNextPage(final Optional aNextPage) { 72 | nextPage = aNextPage; 73 | } 74 | 75 | /* 76 | * (non-Javadoc) 77 | * 78 | * @see java.lang.Object#hashCode() 79 | */ 80 | @Override 81 | public int hashCode() { 82 | final int prime = 31; 83 | int result = 1; 84 | result = prime * result + (elements == null ? 0 : elements.hashCode()); 85 | result = prime * result + (nextPage == null ? 0 : nextPage.hashCode()); 86 | return result; 87 | } 88 | 89 | @Override 90 | public String toString() { 91 | return "PagedElementListDTO{" + "elements=" + elements + ", nextPage=" + nextPage + '}'; 92 | } 93 | 94 | public boolean addElement(final T element) { 95 | return elements.add(element); 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/base/Qualities.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.base; 2 | 3 | public enum Qualities 4 | { 5 | HD("HD"), NORMAL("Normal"), SMALL("Klein"), UHD("UHD"); 6 | 7 | private final String description; 8 | 9 | Qualities(String aDescription) 10 | { 11 | description = aDescription; 12 | } 13 | 14 | public String getDescription() 15 | { 16 | return description; 17 | } 18 | 19 | public static Qualities getResolutionFromWidth(final int width) { 20 | if (width > 1280) { 21 | return Qualities.HD; 22 | } else if (width > 640) { 23 | return Qualities.NORMAL; 24 | } else { 25 | return Qualities.SMALL; 26 | } 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/base/SendungOverviewDto.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.base; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | import java.util.Optional; 6 | 7 | public class SendungOverviewDto { 8 | 9 | private final List urls; 10 | private Optional nextPageId; 11 | 12 | public SendungOverviewDto() { 13 | urls = new ArrayList<>(); 14 | nextPageId = Optional.empty(); 15 | } 16 | 17 | public boolean addUrl(final String aUrl) { 18 | return urls.add(new CrawlerUrlDTO(aUrl)); 19 | } 20 | 21 | public Optional getNextPageId() { 22 | return nextPageId; 23 | } 24 | 25 | public List getUrls() { 26 | return urls; 27 | } 28 | 29 | public void setNextPageId(final Optional aNextPageId) { 30 | nextPageId = aNextPageId; 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/base/TopicUrlDTO.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.base; 2 | 3 | import java.util.Objects; 4 | 5 | public class TopicUrlDTO extends CrawlerUrlDTO { 6 | 7 | private final String topic; 8 | 9 | public TopicUrlDTO(String aTopic, String aUrl) { 10 | super(aUrl); 11 | topic = aTopic; 12 | } 13 | 14 | public String getTopic() { 15 | return topic; 16 | } 17 | 18 | @Override 19 | public boolean equals(final Object obj) { 20 | if (obj == null || getClass() != obj.getClass()) { 21 | return false; 22 | } 23 | if (super.equals(obj)) { 24 | return getTopic().equals(((TopicUrlDTO) obj).getTopic()); 25 | } 26 | 27 | return false; 28 | } 29 | 30 | @Override 31 | public int hashCode() { 32 | int hash = 3; 33 | hash = 31 * hash + Objects.hashCode(this.topic) + super.hashCode(); 34 | return hash; 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/base/UrlParseException.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.base; 2 | 3 | public class UrlParseException extends Exception { 4 | 5 | public UrlParseException(final String aMessage) { 6 | super(aMessage); 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/dreisat/DreisatConstants.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.dreisat; 2 | 3 | public final class DreisatConstants { 4 | 5 | /** 6 | * Base url of the 3Sat website. 7 | */ 8 | public static final String URL_BASE = "https://www.3sat.de"; 9 | /** 10 | * Base url of the 3Sat api. 11 | */ 12 | public static final String URL_API_BASE = "https://api.3sat.de"; 13 | /** 14 | * Url to search the films. 15 | */ 16 | public static final String URL_DAY 17 | = URL_API_BASE 18 | + "/search/documents?hasVideo=true&q=*&types=page-video&sortOrder=desc&from=%sT00:00:00.000%%2B01:00&to=%sT23:59:59.999%%2B01:00&sortBy=date&page=1"; 19 | 20 | public static final String URL_HTML_DAY = URL_BASE + "/programm?airtimeDate=%s"; 21 | 22 | private DreisatConstants() { 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/dreisat/DreisatDayPageHtmlDeserializer.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.dreisat; 2 | 3 | import mServer.crawler.sender.base.CrawlerUrlDTO; 4 | import mServer.crawler.sender.base.UrlUtils; 5 | import mServer.crawler.sender.zdf.ZdfConstants; 6 | import org.jsoup.nodes.Document; 7 | import org.jsoup.select.Elements; 8 | 9 | import java.util.HashSet; 10 | import java.util.Optional; 11 | import java.util.Set; 12 | 13 | public class DreisatDayPageHtmlDeserializer { 14 | 15 | private static final String ATTRIBUTE_HREF = "href"; 16 | private static final String LINK_SELECTOR = "article a"; 17 | private final String urlApiBase; 18 | 19 | public DreisatDayPageHtmlDeserializer(final String urlApiBase) { 20 | this.urlApiBase = urlApiBase; 21 | } 22 | 23 | public Set deserialize(final Document document) { 24 | final Set results = new HashSet<>(); 25 | 26 | Elements filmUrls = document.select(LINK_SELECTOR); 27 | filmUrls.forEach( 28 | filmUrlElement -> { 29 | final Optional url = 30 | buildFilmUrlJsonFromHtmlLink(filmUrlElement.attr(ATTRIBUTE_HREF)); 31 | url.ifPresent(s -> results.add(new CrawlerUrlDTO(s))); 32 | }); 33 | 34 | return results; 35 | } 36 | 37 | private Optional buildFilmUrlJsonFromHtmlLink(String attr) { 38 | return UrlUtils.getFileName(attr) 39 | .map(s -> String.format(ZdfConstants.URL_FILM_JSON, urlApiBase, s.split("\\.")[0])); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/dreisat/DreisatDayPageHtmlTask.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.dreisat; 2 | 3 | import mServer.crawler.sender.MediathekReader; 4 | import mServer.crawler.sender.base.AbstractDocumentTask; 5 | import mServer.crawler.sender.base.AbstractRecursivConverterTask; 6 | import mServer.crawler.sender.base.CrawlerUrlDTO; 7 | import mServer.crawler.sender.base.JsoupConnection; 8 | import org.jsoup.nodes.Document; 9 | 10 | import java.util.concurrent.ConcurrentLinkedQueue; 11 | 12 | public class DreisatDayPageHtmlTask extends AbstractDocumentTask { 13 | 14 | private final transient DreisatDayPageHtmlDeserializer deserializer; 15 | private final String apiUrlBase; 16 | private final transient JsoupConnection jsoupConnection; 17 | 18 | public DreisatDayPageHtmlTask( 19 | final String apiUrlBase, 20 | final MediathekReader crawler, 21 | final ConcurrentLinkedQueue urlToCrawlDTOs, 22 | final JsoupConnection jsoupConnection) { 23 | super(crawler, urlToCrawlDTOs); 24 | this.apiUrlBase = apiUrlBase; 25 | deserializer = new DreisatDayPageHtmlDeserializer(apiUrlBase); 26 | this.jsoupConnection = jsoupConnection; 27 | } 28 | 29 | @Override 30 | protected void processDocument(CrawlerUrlDTO aUrlDTO, Document aDocument) { 31 | taskResults.addAll(deserializer.deserialize(aDocument)); 32 | } 33 | 34 | @Override 35 | protected AbstractRecursivConverterTask createNewOwnInstance( 36 | ConcurrentLinkedQueue aElementsToProcess) { 37 | return new DreisatDayPageHtmlTask(apiUrlBase, crawler, aElementsToProcess, jsoupConnection); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/dw/DwConstants.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.dw; 2 | 3 | public class DwConstants { 4 | private DwConstants() {} 5 | 6 | public static final String URL_BASE = "https://api.dw.com/api"; 7 | 8 | public static final String URL_OVERVIEW = "/list/mediacenter/1?pageIndex=1"; 9 | 10 | } 11 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/dw/DwCrawler.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.dw; 2 | 3 | import de.mediathekview.mlib.Const; 4 | import de.mediathekview.mlib.daten.DatenFilm; 5 | 6 | import java.util.Collection; 7 | import java.util.Set; 8 | import java.util.concurrent.ConcurrentLinkedQueue; 9 | import java.util.concurrent.ExecutionException; 10 | import java.util.concurrent.RecursiveTask; 11 | 12 | import de.mediathekview.mlib.tool.Log; 13 | import mServer.crawler.FilmeSuchen; 14 | import mServer.crawler.sender.MediathekCrawler; 15 | import mServer.crawler.sender.base.CrawlerUrlDTO; 16 | import mServer.crawler.sender.dw.tasks.DWOverviewTask; 17 | import mServer.crawler.sender.dw.tasks.DwFilmDetailTask; 18 | import org.apache.logging.log4j.LogManager; 19 | import org.apache.logging.log4j.Logger; 20 | 21 | public class DwCrawler extends MediathekCrawler { 22 | 23 | private static final Logger LOG = LogManager.getLogger(DwCrawler.class); 24 | 25 | public DwCrawler(FilmeSuchen ssearch, int startPrio) { 26 | super(ssearch, Const.DW, 0, 1, startPrio); 27 | } 28 | 29 | @Override 30 | protected RecursiveTask> createCrawlerTask() { 31 | ConcurrentLinkedQueue shows =new ConcurrentLinkedQueue<>(); 32 | try { 33 | shows.addAll(getShows()); 34 | 35 | Log.sysLog("DW Anzahl: " + shows.size()); 36 | meldungAddMax(shows.size()); 37 | 38 | } catch (final InterruptedException ex) { 39 | LOG.debug("{} crawler interrupted.", getSendername(), ex); 40 | Thread.currentThread().interrupt(); 41 | } catch (final ExecutionException ex) { 42 | LOG.fatal("Exception in {} crawler.", getSendername(), ex); 43 | } 44 | 45 | return new DwFilmDetailTask(this,shows); 46 | } 47 | 48 | private Collection getShows() throws ExecutionException, InterruptedException { 49 | final CrawlerUrlDTO url = new CrawlerUrlDTO(DwConstants.URL_BASE + DwConstants.URL_OVERVIEW); 50 | 51 | final ConcurrentLinkedQueue startUrl = new ConcurrentLinkedQueue<>(); 52 | startUrl.add(url); 53 | 54 | final DWOverviewTask overviewTask = new DWOverviewTask(this, startUrl, 0); 55 | return forkJoinPool.submit(overviewTask).get(); 56 | 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/dw/DwFilmDetailDto.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.dw; 2 | 3 | import java.util.Optional; 4 | 5 | public class DwFilmDetailDto { 6 | 7 | private String baseName; 8 | private String topic; 9 | private String title; 10 | private String description; 11 | private String website; 12 | 13 | public String getBaseName() { 14 | return baseName; 15 | } 16 | 17 | public void setBaseName(String baseName) { 18 | this.baseName = baseName; 19 | } 20 | 21 | public String getTopic() { 22 | return topic; 23 | } 24 | 25 | public void setTopic(String topic) { 26 | this.topic = topic; 27 | } 28 | 29 | public String getTitle() { 30 | return title; 31 | } 32 | 33 | public void setTitle(String title) { 34 | this.title = title; 35 | } 36 | 37 | public String getDescription() { 38 | return description; 39 | } 40 | 41 | public void setDescription(String description) { 42 | this.description = description; 43 | } 44 | 45 | public Optional getWebsite() { 46 | if (website == null || website.isEmpty()) { 47 | return Optional.empty(); 48 | } 49 | 50 | return Optional.of(website); 51 | } 52 | 53 | public void setWebsite(String website) { 54 | this.website = website; 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/dw/DwVideoDto.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.dw; 2 | 3 | public class DwVideoDto { 4 | 5 | private final int bitRate; 6 | private final String url; 7 | private final int width; 8 | 9 | public DwVideoDto(String url, int width, int bitRate) { 10 | this.url = url; 11 | this.width = width; 12 | this.bitRate = bitRate; 13 | } 14 | 15 | public int getBitRate() { 16 | return bitRate; 17 | } 18 | 19 | public String getUrl() { 20 | return url; 21 | } 22 | 23 | public int getWidth() { 24 | return width; 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/dw/tasks/DwFilmDetailTask.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.dw.tasks; 2 | 3 | import com.google.gson.reflect.TypeToken; 4 | import de.mediathekview.mlib.daten.DatenFilm; 5 | import jakarta.ws.rs.client.WebTarget; 6 | 7 | import java.lang.reflect.Type; 8 | import java.util.Optional; 9 | import java.util.concurrent.ConcurrentLinkedQueue; 10 | 11 | import mServer.crawler.FilmeSuchen; 12 | import mServer.crawler.RunSender; 13 | import mServer.crawler.sender.MediathekReader; 14 | import mServer.crawler.sender.base.CrawlerUrlDTO; 15 | import mServer.crawler.sender.dw.DWTaskBase; 16 | import mServer.crawler.sender.dw.parser.DwFilmDetailDeserializer; 17 | import org.apache.logging.log4j.LogManager; 18 | import org.apache.logging.log4j.Logger; 19 | 20 | @SuppressWarnings("serial") 21 | public class DwFilmDetailTask extends DWTaskBase { 22 | private static final Logger LOG = LogManager.getLogger(DwFilmDetailTask.class); 23 | private static final Type OPTIONAL_FILM_DETAIL_DTO_TYPE_TOKEN = 24 | new TypeToken>() {}.getType(); 25 | 26 | public DwFilmDetailTask( 27 | final MediathekReader aCrawler, 28 | final ConcurrentLinkedQueue aUrlToCrawlDTOs) { 29 | super(aCrawler, aUrlToCrawlDTOs, Optional.empty()); 30 | 31 | registerJsonDeserializer( 32 | OPTIONAL_FILM_DETAIL_DTO_TYPE_TOKEN, new DwFilmDetailDeserializer()); 33 | } 34 | 35 | @Override 36 | protected DwFilmDetailTask createNewOwnInstance( 37 | final ConcurrentLinkedQueue aElementsToProcess) { 38 | return new DwFilmDetailTask(crawler, aElementsToProcess); 39 | } 40 | 41 | @Override 42 | protected void processRestTarget(final CrawlerUrlDTO aDTO, final WebTarget aTarget) { 43 | Optional filmDetailDtoOptional = Optional.empty(); 44 | try { 45 | filmDetailDtoOptional = deserializeOptional(aTarget, OPTIONAL_FILM_DETAIL_DTO_TYPE_TOKEN); 46 | } catch (Exception e) { 47 | LOG.error("error processing {} ", aDTO.getUrl(), e); 48 | } 49 | if (!filmDetailDtoOptional.isPresent()) { 50 | FilmeSuchen.listeSenderLaufen.inc(crawler.getSendername(), RunSender.Count.FEHLER); 51 | FilmeSuchen.listeSenderLaufen.inc(crawler.getSendername(), RunSender.Count.FEHLVERSUCHE); 52 | return; 53 | } 54 | this.taskResults.add(filmDetailDtoOptional.get()); 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/funk/ApiUrlBuilder.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.funk; 2 | 3 | import mServer.crawler.sender.base.CrawlerUrlDTO; 4 | 5 | import java.net.URL; 6 | import java.util.ArrayList; 7 | import java.util.Collections; 8 | import java.util.List; 9 | import java.util.Optional; 10 | import java.util.concurrent.ConcurrentLinkedQueue; 11 | 12 | public class ApiUrlBuilder { 13 | private final String urlTemplate; 14 | private final CrawlerUrlType baseUrlUrlType; 15 | private final List parameters; 16 | 17 | public ApiUrlBuilder( 18 | final CrawlerUrlType baseUrlUrlType, 19 | final String urlTemplate) { 20 | super(); 21 | parameters = new ArrayList<>(); 22 | this.baseUrlUrlType = baseUrlUrlType; 23 | this.urlTemplate = urlTemplate; 24 | } 25 | 26 | public ApiUrlBuilder withParameter(final String parameter) { 27 | parameters.add(parameter); 28 | return this; 29 | } 30 | 31 | public String asString() { 32 | final Optional apiUrl = baseUrlUrlType.getDefaultUrl(); 33 | if (apiUrl.isPresent()) { 34 | final List urlParameter = new ArrayList<>(); 35 | urlParameter.add(String.valueOf(apiUrl.get())); 36 | urlParameter.addAll(parameters); 37 | return String.format(urlTemplate, urlParameter.toArray()); 38 | } else { 39 | throw new IllegalStateException("The API base URL is empty!"); 40 | } 41 | } 42 | 43 | public CrawlerUrlDTO asCrawlerUrl() { 44 | return new CrawlerUrlDTO(asString()); 45 | } 46 | 47 | public ConcurrentLinkedQueue asQueue() { 48 | return new ConcurrentLinkedQueue<>(Collections.singletonList(asCrawlerUrl())); 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/funk/CrawlerUrlType.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.funk; 2 | 3 | import org.apache.commons.lang3.StringUtils; 4 | 5 | import java.net.MalformedURLException; 6 | import java.net.URL; 7 | import java.util.Optional; 8 | 9 | public enum CrawlerUrlType { 10 | FUNK_WEBSITE("https://www.funk.net"), 11 | FUNK_API_URL("https://www.funk.net/api/v4.0/"), 12 | NEXX_CLOUD_API_URL("https://api.nexx.cloud/v3/741"); 13 | 14 | private URL defaultUrl; 15 | 16 | CrawlerUrlType(final String urlText) { 17 | try { 18 | if (StringUtils.isNotEmpty(urlText)) { 19 | defaultUrl = new URL(urlText); 20 | } 21 | } catch (final MalformedURLException e) { 22 | defaultUrl = null; 23 | } 24 | } 25 | 26 | public Optional getDefaultUrl() { 27 | return Optional.ofNullable(defaultUrl); 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/funk/FunkApiUrls.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.funk; 2 | 3 | import mServer.crawler.sender.base.CrawlerUrlDTO; 4 | import org.jetbrains.annotations.NotNull; 5 | 6 | import javax.annotation.Nullable; 7 | import java.util.Optional; 8 | import java.util.concurrent.ConcurrentLinkedQueue; 9 | 10 | public enum FunkApiUrls { 11 | /** 12 | * The channels overview url. No channel id needed. 13 | */ 14 | CHANNELS("%s/channels/?size=%s"), 15 | /** 16 | * The video overview url. No channel id needed. 17 | */ 18 | VIDEOS("%s/videos/?size=%s"), 19 | /** 20 | * The videos for a specific channel. Channel id needed. 21 | */ 22 | VIDEOS_BY_CHANNEL("%s/videos/byChannelId/%s?size=%s"); 23 | 24 | private static final String MAX_URLS_PER_TASK = "99"; 25 | private final String urlTemplate; 26 | 27 | FunkApiUrls(final String aUrlTemplate) { 28 | urlTemplate = aUrlTemplate; 29 | } 30 | 31 | public CrawlerUrlDTO getAsCrawlerUrl( 32 | final @Nullable String channelId) { 33 | return buildUrl(channelId).asCrawlerUrl(); 34 | } 35 | 36 | @NotNull 37 | private ApiUrlBuilder buildUrl(final @Nullable String channelId) { 38 | final ApiUrlBuilder apiUrlBuilder = 39 | new ApiUrlBuilder(CrawlerUrlType.FUNK_API_URL, urlTemplate); 40 | Optional.ofNullable(channelId).ifPresent(apiUrlBuilder::withParameter); 41 | apiUrlBuilder.withParameter(MAX_URLS_PER_TASK); 42 | return apiUrlBuilder; 43 | } 44 | 45 | public ConcurrentLinkedQueue getAsQueue() { 46 | return getAsQueue(null); 47 | } 48 | 49 | public ConcurrentLinkedQueue getAsQueue( 50 | final @Nullable String channelId) { 51 | return buildUrl(channelId).asQueue(); 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/funk/FunkChannelDTO.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.funk; 2 | 3 | import java.util.Objects; 4 | 5 | /** Represents a Funk channel. */ 6 | public class FunkChannelDTO { 7 | private final String channelId; 8 | private final String channelTitle; 9 | 10 | /** 11 | * @param channelId The channel ID. 12 | * @param channelTitle The channel title. 13 | */ 14 | public FunkChannelDTO(final String channelId, final String channelTitle) { 15 | this.channelId = channelId; 16 | this.channelTitle = channelTitle; 17 | } 18 | 19 | public String getChannelId() { 20 | return channelId; 21 | } 22 | 23 | public String getChannelTitle() { 24 | return channelTitle; 25 | } 26 | 27 | @Override 28 | public boolean equals(final Object o) { 29 | if (this == o) { 30 | return true; 31 | } 32 | if (o == null || getClass() != o.getClass()) { 33 | return false; 34 | } 35 | final FunkChannelDTO that = (FunkChannelDTO) o; 36 | return Objects.equals(channelId, that.channelId) 37 | && Objects.equals(channelTitle, that.channelTitle); 38 | } 39 | 40 | @Override 41 | public int hashCode() { 42 | return Objects.hash(channelId, channelTitle); 43 | } 44 | 45 | @Override 46 | public String toString() { 47 | return "FunkChannelDTO{" 48 | + "channelId='" 49 | + channelId 50 | + '\'' 51 | + ", channelTitle='" 52 | + channelTitle 53 | + '\'' 54 | + '}'; 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/funk/FunkUrls.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.funk; 2 | 3 | import mServer.crawler.sender.base.CrawlerUrlDTO; 4 | import org.jetbrains.annotations.NotNull; 5 | 6 | import java.util.Queue; 7 | import java.util.stream.Stream; 8 | 9 | public enum FunkUrls { 10 | /** 11 | * [website]/channel/[channelAlias]/[alias] 12 | */ 13 | WEBSITE(CrawlerUrlType.FUNK_WEBSITE, "%s/channel/%s/%s"), 14 | /** 15 | * [ApiBaseUrl]/session/init 16 | */ 17 | NEXX_CLOUD_SESSION_INIT(CrawlerUrlType.NEXX_CLOUD_API_URL, "%s/session/init"), 18 | /** 19 | * [ApiBaseUrl]/videos/byid/[videoId] 20 | */ 21 | NEXX_CLOUD_VIDEO(CrawlerUrlType.NEXX_CLOUD_API_URL, "%s/videos/byid/%s"); 22 | 23 | private final String urlTemplate; 24 | private final CrawlerUrlType baseUrlUrlType; 25 | 26 | FunkUrls(final CrawlerUrlType aBaseUrlUrlType, final String aUrlTemplate) { 27 | baseUrlUrlType = aBaseUrlUrlType; 28 | urlTemplate = aUrlTemplate; 29 | } 30 | 31 | @NotNull 32 | private ApiUrlBuilder buildUrl(final String... parameters) { 33 | final ApiUrlBuilder apiUrlBuilder = new ApiUrlBuilder(baseUrlUrlType, urlTemplate); 34 | Stream.of(parameters).forEachOrdered(apiUrlBuilder::withParameter); 35 | return apiUrlBuilder; 36 | } 37 | 38 | public Queue getAsQueue(final String... parameters) { 39 | return buildUrl(parameters).asQueue(); 40 | } 41 | 42 | public String getAsString(final String... parameters) { 43 | return buildUrl(parameters).asString(); 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/funk/json/FunkChannelDeserializer.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.funk.json; 2 | 3 | import com.google.gson.JsonObject; 4 | import mServer.crawler.sender.funk.FunkChannelDTO; 5 | 6 | public class FunkChannelDeserializer extends AbstractFunkElementDeserializer { 7 | private static final String TAG_CHANNEL_DTO_LIST = "channelDTOList"; 8 | private static final String TAG_TITLE = "title"; 9 | private static final String TAG_ENTITY_ID = "entityId"; 10 | 11 | @Override 12 | protected FunkChannelDTO mapToElement(final JsonObject jsonObject) { 13 | return new FunkChannelDTO( 14 | jsonObject.get(TAG_ENTITY_ID).getAsString(), jsonObject.get(TAG_TITLE).getAsString()); 15 | } 16 | 17 | @Override 18 | protected String[] getRequiredTags() { 19 | return new String[]{TAG_TITLE, TAG_ENTITY_ID}; 20 | } 21 | 22 | @Override 23 | protected String getElementListTag() { 24 | return TAG_CHANNEL_DTO_LIST; 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/funk/json/NexxCloudSessionInitDeserializer.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.funk.json; 2 | 3 | import com.google.gson.JsonDeserializationContext; 4 | import com.google.gson.JsonDeserializer; 5 | import com.google.gson.JsonElement; 6 | import com.google.gson.JsonParseException; 7 | import mServer.crawler.sender.base.JsonUtils; 8 | 9 | import java.lang.reflect.Type; 10 | 11 | public class NexxCloudSessionInitDeserializer implements JsonDeserializer { 12 | private static final String TAG_RESULT = "result"; 13 | private static final String TAG_GENERAL = "general"; 14 | private static final String ATTRIBUTE_CID = "cid"; 15 | 16 | @Override 17 | public Long deserialize( 18 | final JsonElement jsonElement, final Type typeOfT, final JsonDeserializationContext context) 19 | throws JsonParseException { 20 | if (JsonUtils.checkTreePath(jsonElement, TAG_RESULT, TAG_GENERAL, ATTRIBUTE_CID)) { 21 | return jsonElement 22 | .getAsJsonObject() 23 | .getAsJsonObject(TAG_RESULT) 24 | .getAsJsonObject(TAG_GENERAL) 25 | .get(ATTRIBUTE_CID) 26 | .getAsLong(); 27 | } 28 | 29 | return null; 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/funk/json/NexxResolutionDTO.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.funk.json; 2 | 3 | import java.util.Objects; 4 | import java.util.Optional; 5 | 6 | public class NexxResolutionDTO { 7 | private final int widht; 8 | private final int height; 9 | private final String size; 10 | private Optional fileId; 11 | 12 | public NexxResolutionDTO(final int widht, final int height, final String size, Optional aFileId) { 13 | this.widht = widht; 14 | this.height = height; 15 | this.size = size; 16 | this.fileId = aFileId; 17 | } 18 | 19 | public int getWidht() { 20 | return widht; 21 | } 22 | 23 | public int getHeight() { 24 | return height; 25 | } 26 | 27 | public String getSize() { 28 | return size; 29 | } 30 | 31 | public Optional getFileId() { 32 | return fileId; 33 | } 34 | 35 | @Override 36 | public boolean equals(final Object o) { 37 | if (this == o) { 38 | return true; 39 | } 40 | if (o == null || getClass() != o.getClass()) { 41 | return false; 42 | } 43 | final NexxResolutionDTO that = (NexxResolutionDTO) o; 44 | return widht == that.widht && height == that.height && Objects.equals(size, that.size); 45 | } 46 | 47 | @Override 48 | public int hashCode() { 49 | return Objects.hash(widht, height, size); 50 | } 51 | 52 | @Override 53 | public String toString() { 54 | return "NexxResolutionDTO{" + "widht=" + widht + ", height=" + height + ", size=" + size + '}'; 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/funk/tasks/FunkChannelsRestTask.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.funk.tasks; 2 | 3 | import mServer.crawler.sender.MediathekReader; 4 | import mServer.crawler.sender.base.CrawlerUrlDTO; 5 | import mServer.crawler.sender.funk.FunkChannelDTO; 6 | 7 | import java.util.concurrent.ConcurrentLinkedQueue; 8 | 9 | public class FunkChannelsRestTask extends FunkRestTask { 10 | public FunkChannelsRestTask( 11 | MediathekReader crawler, 12 | FunkRestEndpoint funkChannelDTOFunkRestEndpoint) { 13 | super(crawler, funkChannelDTOFunkRestEndpoint); 14 | } 15 | 16 | public FunkChannelsRestTask( 17 | MediathekReader crawler, 18 | FunkRestEndpoint funkChannelDTOFunkRestEndpoint, 19 | final ConcurrentLinkedQueue urlsToCrawl) { 20 | super(crawler, funkChannelDTOFunkRestEndpoint, urlsToCrawl); 21 | } 22 | 23 | private FunkChannelsRestTask( 24 | MediathekReader crawler, 25 | FunkRestEndpoint funkChannelDTOFunkRestEndpoint, 26 | final ConcurrentLinkedQueue urlsToCrawl, 27 | final int pageNumber) { 28 | super(crawler, funkChannelDTOFunkRestEndpoint, urlsToCrawl); 29 | } 30 | 31 | @Override 32 | protected Integer getMaximumSubpages() { 33 | // load all channels to fill channel list completely 34 | return Integer.MAX_VALUE; 35 | } 36 | 37 | @Override 38 | protected FunkChannelsRestTask createNewOwnInstance(final ConcurrentLinkedQueue aElementsToProcess, int pageNumber) { 39 | return new FunkChannelsRestTask(crawler, restEndpoint, aElementsToProcess, pageNumber); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/funk/tasks/FunkRestEndpoint.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.funk.tasks; 2 | 3 | import com.google.gson.JsonDeserializer; 4 | import mServer.crawler.sender.base.PagedElementListDTO; 5 | import mServer.crawler.sender.funk.FunkApiUrls; 6 | 7 | import java.util.Objects; 8 | 9 | public class FunkRestEndpoint { 10 | private final FunkApiUrls endpointUrl; 11 | private final JsonDeserializer> deserializer; 12 | 13 | public FunkRestEndpoint( 14 | final FunkApiUrls endpointUrl, final JsonDeserializer> deserializer) { 15 | this.endpointUrl = endpointUrl; 16 | this.deserializer = deserializer; 17 | } 18 | 19 | public FunkApiUrls getEndpointUrl() { 20 | return endpointUrl; 21 | } 22 | 23 | public JsonDeserializer> getDeserializer() { 24 | return deserializer; 25 | } 26 | 27 | @Override 28 | public boolean equals(final Object o) { 29 | if (this == o) { 30 | return true; 31 | } 32 | if (o == null || getClass() != o.getClass()) { 33 | return false; 34 | } 35 | final FunkRestEndpoint that = (FunkRestEndpoint) o; 36 | return endpointUrl == that.endpointUrl && Objects.equals(deserializer, that.deserializer); 37 | } 38 | 39 | @Override 40 | public int hashCode() { 41 | return Objects.hash(endpointUrl, deserializer); 42 | } 43 | 44 | @Override 45 | public String toString() { 46 | return "FunkRestEndpoint{" 47 | + "endpointUrl=" 48 | + endpointUrl 49 | + ", deserializer=" 50 | + deserializer 51 | + '}'; 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/kika/KikaApiConstants.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.kika; 2 | 3 | public final class KikaApiConstants { 4 | 5 | public static final String BASE_URL = "https://www.kika.de/api/v1/kikaplayer/kikaapp/"; 6 | public static final String ALL_VIDEOS = BASE_URL + "api/videos?limit=400&orderBy=date&orderDirection=DESC"; 7 | public static final String WEBSITE = "https://www.kika.de"; 8 | public static final String FILM = BASE_URL + "/api/videos/%s/player-assets"; 9 | 10 | private KikaApiConstants() {} 11 | } 12 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/kika/KikaApiCrawler.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.kika; 2 | 3 | 4 | import org.apache.logging.log4j.LogManager; 5 | import org.apache.logging.log4j.Logger; 6 | 7 | import de.mediathekview.mlib.Const; 8 | import de.mediathekview.mlib.daten.DatenFilm; 9 | import de.mediathekview.mlib.tool.Log; 10 | import mServer.crawler.CrawlerTool; 11 | import mServer.crawler.FilmeSuchen; 12 | import mServer.crawler.sender.MediathekCrawler; 13 | import mServer.crawler.sender.base.JsoupConnection; 14 | import mServer.crawler.sender.kika.tasks.KikaApiFilmTask; 15 | import mServer.crawler.sender.kika.tasks.KikaApiTopicTask; 16 | import mServer.crawler.sender.base.TopicUrlDTO; 17 | 18 | import java.util.Set; 19 | import java.util.concurrent.ConcurrentLinkedQueue; 20 | import java.util.concurrent.RecursiveTask; 21 | 22 | public class KikaApiCrawler extends MediathekCrawler { 23 | private static final Logger LOG = LogManager.getLogger(KikaApiCrawler.class); 24 | 25 | JsoupConnection jsoupConnection; 26 | 27 | public KikaApiCrawler(FilmeSuchen ssearch, int startPrio) { 28 | super(ssearch, Const.KIKA, 0, 1, startPrio); 29 | jsoupConnection = new JsoupConnection(); 30 | } 31 | 32 | 33 | @Override 34 | protected RecursiveTask> createCrawlerTask() { 35 | int maxPages = 3; 36 | 37 | if (CrawlerTool.loadShort()) { 38 | maxPages = 3; 39 | } else if (CrawlerTool.loadLong()) { 40 | maxPages = 10; 41 | } else if (CrawlerTool.loadMax()) { 42 | maxPages = 999; 43 | } else if (CrawlerTool.loadLongMax()) { 44 | maxPages = 999; 45 | } 46 | 47 | try { 48 | final ConcurrentLinkedQueue overivew = new ConcurrentLinkedQueue<>(); 49 | overivew.add(new TopicUrlDTO("starting point", KikaApiConstants.ALL_VIDEOS)); 50 | // 51 | KikaApiTopicTask allvideosTask = new KikaApiTopicTask(this, overivew, 0, maxPages); 52 | final ConcurrentLinkedQueue allVideos = new ConcurrentLinkedQueue<>(); 53 | allVideos.addAll(forkJoinPool.invoke(allvideosTask)); 54 | Log.sysLog("KIKA: Anzahl sendungsfolgen urls: " + allVideos.size()); 55 | meldungAddMax(allVideos.size()); 56 | return new KikaApiFilmTask( 57 | this, new ConcurrentLinkedQueue<>(allVideos)); 58 | } catch (final Exception ex) { 59 | LOG.fatal("Exception in KIKA crawler.", ex); 60 | Log.errorLog(324978332, ex); 61 | } 62 | 63 | 64 | return null; 65 | 66 | 67 | } 68 | } -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/kika/KikaApiTopicDto.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.kika; 2 | 3 | import java.util.HashSet; 4 | import java.util.Optional; 5 | import java.util.Set; 6 | 7 | import mServer.crawler.sender.base.TopicUrlDTO; 8 | 9 | 10 | public class KikaApiTopicDto { 11 | private Optional errorMesssage = Optional.empty(); 12 | private Optional errorCode = Optional.empty(); 13 | private Optional nextUrl = Optional.empty(); 14 | private Set elements = new HashSet<>(); 15 | 16 | public void add(KikaApiFilmDto aKikaApiFilmDto) { 17 | elements.add(aKikaApiFilmDto); 18 | } 19 | 20 | public Set getElements() { 21 | return elements; 22 | } 23 | 24 | public Optional getNextPage() { 25 | return nextUrl; 26 | } 27 | 28 | public void setNextPage(TopicUrlDTO aCrawlerUrlDTO) { 29 | nextUrl = Optional.of(aCrawlerUrlDTO); 30 | } 31 | 32 | public void setError(Optional aErrorCode, Optional aErrorMesssage) { 33 | errorCode = aErrorCode; 34 | errorMesssage = aErrorMesssage; 35 | } 36 | 37 | public Optional getErrorMesssage() { 38 | return errorMesssage; 39 | } 40 | 41 | public Optional getErrorCode() { 42 | return errorCode; 43 | } 44 | 45 | } 46 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/kika/KikaApiVideoInfoDto.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.kika; 2 | 3 | import java.util.EnumMap; 4 | import java.util.HashSet; 5 | import java.util.Map; 6 | import java.util.Optional; 7 | import java.util.Set; 8 | 9 | public class KikaApiVideoInfoDto { 10 | // 11 | private Optional errorMesssage = Optional.empty(); 12 | private Optional errorCode = Optional.empty(); 13 | private Map videoUrls = new EnumMap<>(Resolution.class); 14 | private boolean hasSubtitle = false; 15 | private Set subtitles = new HashSet<>(); 16 | 17 | public void addUrl(Resolution aResolution, String aFilmUrl) { 18 | videoUrls.put(aResolution, aFilmUrl); 19 | } 20 | 21 | public Map getVideoUrls() { 22 | return videoUrls; 23 | } 24 | 25 | public void addSubtitle(String aUrl ) { 26 | subtitles.add(aUrl); 27 | } 28 | 29 | public Set getSubtitle() { 30 | return subtitles; 31 | } 32 | 33 | public void setError(Optional aErrorCode, Optional aErrorMesssage) { 34 | errorCode = aErrorCode; 35 | errorMesssage = aErrorMesssage; 36 | } 37 | 38 | public Optional getErrorMesssage() { 39 | return errorMesssage; 40 | } 41 | 42 | public Optional getErrorCode() { 43 | return errorCode; 44 | } 45 | 46 | public boolean hasSubtitle() { 47 | return hasSubtitle; 48 | } 49 | 50 | public void setSubtitle(boolean hasSub) { 51 | hasSubtitle = hasSub; 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/orfon/OrfOnBreadCrumsUrlDTO.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.orfon; 2 | 3 | 4 | import mServer.crawler.sender.base.CrawlerUrlDTO; 5 | 6 | import java.util.ArrayList; 7 | import java.util.List; 8 | import java.util.Objects; 9 | 10 | public class OrfOnBreadCrumsUrlDTO extends CrawlerUrlDTO { 11 | private List breadCrums = new ArrayList<>(); 12 | 13 | public OrfOnBreadCrumsUrlDTO(String breadCrum, String aUrl) { 14 | super(aUrl); 15 | setBreadCrums(List.of(breadCrum)); 16 | } 17 | public OrfOnBreadCrumsUrlDTO(List breadCrums, String aUrl) { 18 | super(aUrl); 19 | setBreadCrums(breadCrums); 20 | } 21 | 22 | public List getBreadCrums() { 23 | return breadCrums; 24 | } 25 | 26 | public void setBreadCrums(List breadCrums) { 27 | this.breadCrums = breadCrums; 28 | } 29 | 30 | public void setBreadCrumsPath(List breadCrums) { 31 | List fullPath = new ArrayList<>(); 32 | fullPath.addAll(breadCrums); 33 | fullPath.addAll(getBreadCrums()); 34 | setBreadCrums(fullPath); 35 | } 36 | 37 | @Override 38 | public boolean equals(final Object obj) { 39 | if (obj == null || getClass() != obj.getClass()) { 40 | return false; 41 | } 42 | 43 | if (super.equals(obj)) { 44 | return breadCrums.containsAll(((OrfOnBreadCrumsUrlDTO)obj).breadCrums); 45 | } 46 | 47 | return false; 48 | } 49 | 50 | @Override 51 | public int hashCode() { 52 | return Objects.hash(super.hashCode(), this.breadCrums); 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/orfon/OrfOnConstants.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.orfon; 2 | 3 | public final class OrfOnConstants { 4 | 5 | public static final String FILTER_JUGENDSCHUTZ = ".*/Jugendschutz[0-9][0-9][0-9][0-9]b[0-9][0-9][0-9][0-9]_.*"; 6 | // 7 | public static final String HOST = "https://api-tvthek.orf.at/api/v4.3"; 8 | // 9 | public static final String SCHEDULE = HOST + "/schedule"; 10 | // 11 | public static final String AZ = HOST + "/profiles/lettergroup"; 12 | public static final int PAGE_SIZE = 200; 13 | // 14 | public static final String HISTORY = HOST + "/history"; 15 | // 16 | public static final String EPISODE = HOST + "/episode"; 17 | // 18 | public static final String AUTH = "Basic b3JmX29uX3Y0MzpqRlJzYk5QRmlQU3h1d25MYllEZkNMVU41WU5aMjhtdA=="; 19 | // 20 | private OrfOnConstants() {} 21 | // 22 | public static String createMaxLimmitUrl(String plainUrl) { 23 | return plainUrl + "?limit=" + OrfOnConstants.PAGE_SIZE; 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/orfon/json/OrfOnAZDeserializer.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.orfon.json; 2 | 3 | import com.google.gson.*; 4 | import mServer.crawler.sender.base.JsonUtils; 5 | import mServer.crawler.sender.base.PagedElementListDTO; 6 | import mServer.crawler.sender.orfon.OrfOnBreadCrumsUrlDTO; 7 | import mServer.crawler.sender.orfon.OrfOnConstants; 8 | import org.apache.logging.log4j.LogManager; 9 | import org.apache.logging.log4j.Logger; 10 | 11 | import java.lang.reflect.Type; 12 | import java.util.Optional; 13 | 14 | 15 | public class OrfOnAZDeserializer implements JsonDeserializer> { 16 | private static final Logger LOG = LogManager.getLogger(OrfOnAZDeserializer.class); 17 | private static final String[] TAG_NEXT_PAGE = {"_links", "next", "href"}; 18 | private static final String[] TAG_ITEMS = {"_embedded", "items"}; 19 | private static final String TAG_ITEM_ID = "id"; 20 | private static final String[] TAG_ITEM_EPISODES = {"_links", "episodes", "href"}; 21 | 22 | @Override 23 | public PagedElementListDTO deserialize( 24 | final JsonElement jsonElement, final Type typeOfT, final JsonDeserializationContext context) 25 | throws JsonParseException { 26 | JsonObject jsonPage = jsonElement.getAsJsonObject(); 27 | // 28 | PagedElementListDTO page = new PagedElementListDTO<>(); 29 | page.setNextPage(JsonUtils.getElementValueAsString(jsonElement, TAG_NEXT_PAGE)); 30 | // 31 | final Optional items = JsonUtils.getElement(jsonPage, TAG_ITEMS); 32 | if (items.isPresent() && items.get().isJsonArray()) { 33 | for (JsonElement topic : items.get().getAsJsonArray()) { 34 | final Optional id = JsonUtils.getElementValueAsString(topic, TAG_ITEM_ID); 35 | final Optional url = JsonUtils.getElementValueAsString(topic, TAG_ITEM_EPISODES); 36 | if (id.isPresent() && url.isPresent()) { 37 | page.addElement(new OrfOnBreadCrumsUrlDTO(id.get(), OrfOnConstants.createMaxLimmitUrl(url.get()))); 38 | } else { 39 | LOG.debug("No episodes found in item " + id.orElse("")); 40 | LOG.debug("No episodes found in item {}", id); 41 | } 42 | } 43 | } 44 | return page; 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/orfon/json/OrfOnEpisodesDeserializer.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.orfon.json; 2 | 3 | import com.google.gson.*; 4 | import mServer.crawler.sender.base.JsonUtils; 5 | import mServer.crawler.sender.base.PagedElementListDTO; 6 | import mServer.crawler.sender.orfon.OrfOnBreadCrumsUrlDTO; 7 | 8 | import java.lang.reflect.Type; 9 | import java.util.Optional; 10 | 11 | public class OrfOnEpisodesDeserializer implements JsonDeserializer> { 12 | private static final String[] TAG_NEXT_PAGE = {"_links", "next", "href"}; 13 | private static final String[] TAG_ITEMS = {"_embedded", "items"}; 14 | private static final String TAG_EPISODE_ID = "id"; 15 | private static final String[] TAG_EPISODE_LINK = { "_links", "self", "href"}; 16 | 17 | @Override 18 | public PagedElementListDTO deserialize( 19 | final JsonElement jsonElement, final Type typeOfT, final JsonDeserializationContext context) 20 | throws JsonParseException { 21 | JsonObject jsonPage = jsonElement.getAsJsonObject(); 22 | // 23 | PagedElementListDTO page = new PagedElementListDTO<>(); 24 | page.setNextPage(JsonUtils.getElementValueAsString(jsonElement, TAG_NEXT_PAGE)); 25 | // 26 | final Optional items = JsonUtils.getElement(jsonPage, TAG_ITEMS); 27 | if (items.isPresent() && items.get().isJsonArray()) { 28 | for (JsonElement item : items.get().getAsJsonArray()) { 29 | Optional episodeId = JsonUtils.getElementValueAsString(item, TAG_EPISODE_ID); 30 | Optional episodeLink = JsonUtils.getElementValueAsString(item, TAG_EPISODE_LINK); 31 | episodeLink.ifPresent( link -> page.addElement(new OrfOnBreadCrumsUrlDTO(episodeId.orElse("EMPTY"), link))); 32 | } 33 | } 34 | return page; 35 | } 36 | 37 | } 38 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/orfon/json/OrfOnHistoryDeserializer.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.orfon.json; 2 | 3 | import com.google.gson.*; 4 | import mServer.crawler.sender.base.JsonUtils; 5 | import mServer.crawler.sender.base.PagedElementListDTO; 6 | import mServer.crawler.sender.orfon.OrfOnBreadCrumsUrlDTO; 7 | import org.apache.logging.log4j.LogManager; 8 | import org.apache.logging.log4j.Logger; 9 | 10 | import java.lang.reflect.Type; 11 | import java.util.Optional; 12 | 13 | 14 | public class OrfOnHistoryDeserializer implements JsonDeserializer> { 15 | private static final Logger LOG = LogManager.getLogger(OrfOnHistoryDeserializer.class); 16 | private static final String[] TAG_NEXT_PAGE = {}; 17 | private static final String[] TAG_ITEM_ARRAY_TOP = {"history_highlights"}; 18 | private static final String[] TAG_ITEM_TITLE = {"title"}; 19 | private static final String[] TAG_ITEM_ARRAY_BUTTOM = {"history_items"}; 20 | private static final String[] TAG_TARGET_URL = {"_links", "children", "href"}; 21 | 22 | @Override 23 | public PagedElementListDTO deserialize( 24 | final JsonElement jsonElement, final Type typeOfT, final JsonDeserializationContext context) 25 | throws JsonParseException { 26 | // 27 | PagedElementListDTO page = new PagedElementListDTO<>(); 28 | page.setNextPage(JsonUtils.getElementValueAsString(jsonElement, TAG_NEXT_PAGE)); 29 | // 30 | final Optional itemArrayTop = JsonUtils.getElement(jsonElement, TAG_ITEM_ARRAY_TOP); 31 | if (itemArrayTop.isPresent() && itemArrayTop.get().isJsonArray()) { 32 | page.addElements(parseSection(itemArrayTop.get().getAsJsonArray()).getElements()); 33 | } 34 | // 35 | final Optional itemArrayButtom = JsonUtils.getElement(jsonElement, TAG_ITEM_ARRAY_BUTTOM); 36 | if (itemArrayButtom.isPresent() && itemArrayButtom.get().isJsonArray()) { 37 | page.addElements(parseSection(itemArrayButtom.get().getAsJsonArray()).getElements()); 38 | } 39 | // 40 | return page; 41 | } 42 | 43 | public PagedElementListDTO parseSection(JsonArray itemArray) { 44 | PagedElementListDTO items = new PagedElementListDTO<>(); 45 | for (JsonElement item : itemArray) { 46 | final Optional url = JsonUtils.getElementValueAsString(item, TAG_TARGET_URL); 47 | final Optional title = JsonUtils.getElementValueAsString(item, TAG_ITEM_TITLE); 48 | if (url.isPresent()) { 49 | items.addElement(new OrfOnBreadCrumsUrlDTO( 50 | title.orElse("EMPTY"), 51 | url.get() 52 | )); 53 | } else { 54 | LOG.debug("missing url for {}", title); 55 | } 56 | } 57 | return items; 58 | } 59 | 60 | 61 | 62 | } 63 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/orfon/json/OrfOnHistoryVideoItemDeserializer.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.orfon.json; 2 | 3 | import com.google.gson.JsonDeserializationContext; 4 | import com.google.gson.JsonDeserializer; 5 | import com.google.gson.JsonElement; 6 | import com.google.gson.JsonParseException; 7 | import mServer.crawler.sender.base.JsonUtils; 8 | import mServer.crawler.sender.base.PagedElementListDTO; 9 | import mServer.crawler.sender.orfon.OrfOnBreadCrumsUrlDTO; 10 | 11 | import java.lang.reflect.Type; 12 | import java.util.Optional; 13 | 14 | public class OrfOnHistoryVideoItemDeserializer implements JsonDeserializer> { 15 | private static final String[] TAG_NEXT_PAGE = { "next" }; 16 | private static final String[] TAG_ITEM_ARRAY = { "_items" }; 17 | private static final String[] TAG_ITEM_TITLE = {"title"}; 18 | private static final String[] TAG_TARGET_URL = {"_links", "self", "href"}; 19 | private static final String[] TAG_TARGET_URL_EPISODE = {"_links", "episode", "href"}; 20 | 21 | 22 | 23 | @Override 24 | public PagedElementListDTO deserialize( 25 | final JsonElement jsonElement, final Type typeOfT, final JsonDeserializationContext context) 26 | throws JsonParseException { 27 | // 28 | PagedElementListDTO page = new PagedElementListDTO<>(); 29 | page.setNextPage(JsonUtils.getElementValueAsString(jsonElement, TAG_NEXT_PAGE)); 30 | // 31 | Optional itemArrayTop = JsonUtils.getElement(jsonElement, TAG_ITEM_ARRAY); 32 | if (itemArrayTop.isPresent() && itemArrayTop.get().isJsonArray()) { 33 | for (JsonElement item : itemArrayTop.get().getAsJsonArray()) { 34 | final Optional urlSelf = JsonUtils.getElementValueAsString(item, TAG_TARGET_URL); 35 | final Optional urlEpisode = JsonUtils.getElementValueAsString(item, TAG_TARGET_URL_EPISODE); 36 | final Optional title = JsonUtils.getElementValueAsString(item, TAG_ITEM_TITLE); 37 | // self should be an episode but in some cases a segment - only in this cases we have an additional episode element 38 | if (urlSelf.isPresent() && !urlSelf.get().contains("/segment/")) { 39 | page.addElement(new OrfOnBreadCrumsUrlDTO( 40 | title.orElse("MISSING TITLE"), 41 | urlSelf.get() 42 | )); 43 | } else if (urlEpisode.isPresent()) { 44 | page.addElement(new OrfOnBreadCrumsUrlDTO( 45 | title.orElse("MISSING TITLE"), 46 | urlEpisode.get() 47 | )); 48 | } 49 | } 50 | } 51 | // 52 | return page; 53 | } 54 | 55 | 56 | 57 | 58 | } 59 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/orfon/json/OrfOnScheduleDeserializer.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.orfon.json; 2 | 3 | import com.google.gson.*; 4 | import mServer.crawler.sender.base.JsonUtils; 5 | import mServer.crawler.sender.base.PagedElementListDTO; 6 | import mServer.crawler.sender.orfon.OrfOnBreadCrumsUrlDTO; 7 | import mServer.crawler.sender.orfon.OrfOnConstants; 8 | 9 | import java.lang.reflect.Type; 10 | import java.util.Optional; 11 | 12 | 13 | public class OrfOnScheduleDeserializer implements JsonDeserializer> { 14 | private static final String TAG_FILM_ID = "id"; 15 | 16 | @Override 17 | public PagedElementListDTO deserialize( 18 | final JsonElement jsonElement, final Type typeOfT, final JsonDeserializationContext context) 19 | throws JsonParseException { 20 | PagedElementListDTO collectIds = new PagedElementListDTO<>(); 21 | final JsonArray elements = jsonElement.getAsJsonArray(); 22 | for (JsonElement element : elements) { 23 | final Optional id = JsonUtils.getElementValueAsString(element, TAG_FILM_ID); 24 | if (id.isPresent()) { 25 | final String url = OrfOnConstants.EPISODE + "/" + id.get(); 26 | collectIds.addElement(new OrfOnBreadCrumsUrlDTO(id.get(), url)); 27 | } 28 | } 29 | return collectIds; 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/orfon/task/OrfOnAZTask.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.orfon.task; 2 | 3 | 4 | import com.google.gson.JsonDeserializer; 5 | import com.google.gson.reflect.TypeToken; 6 | import mServer.crawler.sender.MediathekReader; 7 | import mServer.crawler.sender.base.AbstractRecursivConverterTask; 8 | import mServer.crawler.sender.base.PagedElementListDTO; 9 | import mServer.crawler.sender.orfon.OrfOnBreadCrumsUrlDTO; 10 | import mServer.crawler.sender.orfon.json.OrfOnAZDeserializer; 11 | 12 | import java.lang.reflect.Type; 13 | import java.util.concurrent.ConcurrentLinkedQueue; 14 | 15 | // extends AbstractRestTask 16 | // return T Class from this task, desirialisation of class R , D , Reasearch in this url 17 | public class OrfOnAZTask extends OrfOnPagedTask { 18 | private static final long serialVersionUID = 1L; 19 | 20 | public OrfOnAZTask(MediathekReader crawler, ConcurrentLinkedQueue urlToCrawlDTOs) { 21 | super(crawler, urlToCrawlDTOs); 22 | } 23 | 24 | @Override 25 | protected JsonDeserializer> getParser(OrfOnBreadCrumsUrlDTO aDTO) { 26 | return new OrfOnAZDeserializer(); 27 | } 28 | 29 | @Override 30 | protected Type getType() { 31 | return new TypeToken>() {}.getType(); 32 | } 33 | 34 | @Override 35 | protected AbstractRecursivConverterTask createNewOwnInstance(ConcurrentLinkedQueue aElementsToProcess) { 36 | return new OrfOnAZTask(crawler, aElementsToProcess); 37 | } 38 | 39 | } 40 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/orfon/task/OrfOnEpisodesTask.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.orfon.task; 2 | 3 | import com.google.gson.JsonDeserializer; 4 | import com.google.gson.reflect.TypeToken; 5 | import mServer.crawler.sender.MediathekReader; 6 | import mServer.crawler.sender.base.AbstractRecursivConverterTask; 7 | import mServer.crawler.sender.base.PagedElementListDTO; 8 | import mServer.crawler.sender.orfon.OrfOnBreadCrumsUrlDTO; 9 | import mServer.crawler.sender.orfon.json.OrfOnEpisodesDeserializer; 10 | 11 | import java.lang.reflect.Type; 12 | import java.util.concurrent.ConcurrentLinkedQueue; 13 | 14 | // extends AbstractRestTask 15 | // return T Class from this task, desirialisation of class R , D , Reasearch in this url 16 | public class OrfOnEpisodesTask extends OrfOnPagedTask { 17 | private static final long serialVersionUID = 1L; 18 | 19 | public OrfOnEpisodesTask(MediathekReader crawler, ConcurrentLinkedQueue urlToCrawlDTOs) { 20 | super(crawler, urlToCrawlDTOs); 21 | } 22 | 23 | @Override 24 | public JsonDeserializer> getParser(OrfOnBreadCrumsUrlDTO aDTO) { 25 | return new OrfOnEpisodesDeserializer(); 26 | } 27 | 28 | @Override 29 | public Type getType() { 30 | return new TypeToken>() {}.getType(); 31 | } 32 | 33 | @Override 34 | public AbstractRecursivConverterTask createNewOwnInstance(ConcurrentLinkedQueue aElementsToProcess) { 35 | return new OrfOnEpisodesTask(crawler, aElementsToProcess); 36 | } 37 | 38 | 39 | 40 | } 41 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/orfon/task/OrfOnHistoryChildrenTask.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.orfon.task; 2 | 3 | import com.google.gson.JsonDeserializer; 4 | import com.google.gson.reflect.TypeToken; 5 | import mServer.crawler.sender.MediathekReader; 6 | import mServer.crawler.sender.base.AbstractRecursivConverterTask; 7 | import mServer.crawler.sender.base.PagedElementListDTO; 8 | import mServer.crawler.sender.orfon.OrfOnBreadCrumsUrlDTO; 9 | import mServer.crawler.sender.orfon.json.OrfOnHistoryChildrenDeserializer; 10 | 11 | import java.lang.reflect.Type; 12 | import java.util.Set; 13 | import java.util.concurrent.ConcurrentLinkedQueue; 14 | 15 | // extends AbstractRestTask 16 | // return T Class from this task, desirialisation of class R , D , Reasearch in this url 17 | public class OrfOnHistoryChildrenTask extends OrfOnPagedTask { 18 | private static final long serialVersionUID = 1L; 19 | 20 | public OrfOnHistoryChildrenTask(MediathekReader crawler, ConcurrentLinkedQueue urlToCrawlDTOs) { 21 | super(crawler, urlToCrawlDTOs); 22 | } 23 | 24 | @Override 25 | protected JsonDeserializer> getParser(OrfOnBreadCrumsUrlDTO aDTO) { 26 | return new OrfOnHistoryChildrenDeserializer(); 27 | } 28 | 29 | @Override 30 | protected Type getType() { 31 | return new TypeToken>() {}.getType(); 32 | } 33 | 34 | @Override 35 | protected void postProcessingElements(Set elements, OrfOnBreadCrumsUrlDTO originalDTO) { 36 | for (OrfOnBreadCrumsUrlDTO element : elements) { 37 | if (element.getUrl().contains("/children")) { 38 | final ConcurrentLinkedQueue moreContentOnNewPage = new ConcurrentLinkedQueue<>(); 39 | moreContentOnNewPage.add(element); 40 | AbstractRecursivConverterTask resolveChildren = createNewOwnInstance(moreContentOnNewPage); 41 | resolveChildren.fork(); 42 | for(OrfOnBreadCrumsUrlDTO moreElements : resolveChildren.join()) { 43 | moreElements.setBreadCrumsPath(originalDTO.getBreadCrums()); 44 | taskResults.add(moreElements); 45 | } 46 | } else { 47 | element.setBreadCrumsPath(originalDTO.getBreadCrums()); 48 | taskResults.add(element); 49 | } 50 | } 51 | } 52 | 53 | @Override 54 | protected AbstractRecursivConverterTask createNewOwnInstance(ConcurrentLinkedQueue aElementsToProcess) { 55 | return new OrfOnHistoryChildrenTask(crawler, aElementsToProcess); 56 | } 57 | 58 | } 59 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/orfon/task/OrfOnHistoryTask.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.orfon.task; 2 | 3 | import com.google.gson.JsonDeserializer; 4 | import com.google.gson.reflect.TypeToken; 5 | import mServer.crawler.sender.MediathekReader; 6 | import mServer.crawler.sender.base.AbstractRecursivConverterTask; 7 | import mServer.crawler.sender.base.PagedElementListDTO; 8 | import mServer.crawler.sender.orfon.OrfOnBreadCrumsUrlDTO; 9 | import mServer.crawler.sender.orfon.json.OrfOnHistoryDeserializer; 10 | 11 | import java.lang.reflect.Type; 12 | import java.util.concurrent.ConcurrentLinkedQueue; 13 | 14 | // extends AbstractRestTask 15 | // return T Class from this task, desirialisation of class R , D , Reasearch in this url 16 | public class OrfOnHistoryTask extends OrfOnPagedTask { 17 | private static final long serialVersionUID = 1L; 18 | 19 | 20 | public OrfOnHistoryTask(MediathekReader crawler, ConcurrentLinkedQueue urlToCrawlDTOs) { 21 | super(crawler, urlToCrawlDTOs); 22 | } 23 | 24 | @Override 25 | protected JsonDeserializer> getParser(OrfOnBreadCrumsUrlDTO aDTO) { 26 | return new OrfOnHistoryDeserializer(); 27 | } 28 | 29 | @Override 30 | protected Type getType() { 31 | return new TypeToken>() {}.getType(); 32 | } 33 | 34 | @Override 35 | protected AbstractRecursivConverterTask createNewOwnInstance(ConcurrentLinkedQueue aElementsToProcess) { 36 | return new OrfOnHistoryTask(crawler, aElementsToProcess); 37 | } 38 | 39 | 40 | } 41 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/orfon/task/OrfOnHistoryVideoItemTask.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.orfon.task; 2 | 3 | import com.google.gson.JsonDeserializer; 4 | import com.google.gson.reflect.TypeToken; 5 | import mServer.crawler.sender.MediathekReader; 6 | import mServer.crawler.sender.base.AbstractRecursivConverterTask; 7 | import mServer.crawler.sender.base.PagedElementListDTO; 8 | import mServer.crawler.sender.orfon.OrfOnBreadCrumsUrlDTO; 9 | import mServer.crawler.sender.orfon.json.OrfOnHistoryVideoItemDeserializer; 10 | 11 | import java.lang.reflect.Type; 12 | import java.util.concurrent.ConcurrentLinkedQueue; 13 | 14 | // extends AbstractRestTask 15 | // return T Class from this task, desirialisation of class R , D , Reasearch in this url 16 | public class OrfOnHistoryVideoItemTask extends OrfOnPagedTask { 17 | private static final long serialVersionUID = 1L; 18 | 19 | public OrfOnHistoryVideoItemTask(MediathekReader crawler, ConcurrentLinkedQueue urlToCrawlDTOs) { 20 | super(crawler, urlToCrawlDTOs); 21 | } 22 | 23 | @Override 24 | protected JsonDeserializer> getParser(OrfOnBreadCrumsUrlDTO aDTO) { 25 | return new OrfOnHistoryVideoItemDeserializer(); 26 | } 27 | 28 | @Override 29 | protected Type getType() { 30 | return new TypeToken>() {}.getType(); 31 | } 32 | 33 | @Override 34 | protected AbstractRecursivConverterTask createNewOwnInstance(ConcurrentLinkedQueue aElementsToProcess) { 35 | return new OrfOnHistoryVideoItemTask(crawler, aElementsToProcess); 36 | } 37 | 38 | } 39 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/orfon/task/OrfOnScheduleTask.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.orfon.task; 2 | 3 | import com.google.gson.JsonDeserializer; 4 | import com.google.gson.reflect.TypeToken; 5 | import mServer.crawler.sender.MediathekReader; 6 | import mServer.crawler.sender.base.AbstractRecursivConverterTask; 7 | import mServer.crawler.sender.base.PagedElementListDTO; 8 | import mServer.crawler.sender.base.TopicUrlDTO; 9 | import mServer.crawler.sender.orfon.OrfOnBreadCrumsUrlDTO; 10 | import mServer.crawler.sender.orfon.json.OrfOnScheduleDeserializer; 11 | 12 | import java.lang.reflect.Type; 13 | import java.util.Set; 14 | import java.util.concurrent.ConcurrentLinkedQueue; 15 | 16 | // extends AbstractRestTask 17 | // return T Class from this task, desirialisation of class R , D , Reasearch in this url 18 | public class OrfOnScheduleTask extends OrfOnPagedTask { 19 | private static final long serialVersionUID = -2556623295745879044L; 20 | 21 | public OrfOnScheduleTask(MediathekReader crawler, ConcurrentLinkedQueue urlToCrawlDTOs) { 22 | super(crawler, urlToCrawlDTOs); 23 | } 24 | 25 | @Override 26 | protected JsonDeserializer> getParser(OrfOnBreadCrumsUrlDTO aDTO) { 27 | return new OrfOnScheduleDeserializer(); 28 | } 29 | 30 | @Override 31 | protected Type getType() { 32 | return new TypeToken>() {}.getType(); 33 | } 34 | 35 | @Override 36 | protected AbstractRecursivConverterTask createNewOwnInstance( 37 | ConcurrentLinkedQueue aElementsToProcess) { 38 | return new OrfOnScheduleTask(crawler, aElementsToProcess); 39 | } 40 | 41 | } 42 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/phoenix/PhoenixConstants.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.phoenix; 2 | 3 | public final class PhoenixConstants { 4 | private PhoenixConstants() {} 5 | 6 | public static final String URL_BASE = "https://www.phoenix.de"; 7 | 8 | public static final String URL_OVERVIEW_JSON = "/response/template/sendungseite_overview_json"; 9 | 10 | public static final String URL_FILM_DETAIL_JSON = "/response/id/"; 11 | public static final String URL_VIDEO_DETAILS = "%s/php/mediaplayer/data/beitrags_details.php?id=%s"; 12 | } 13 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/phoenix/PhoenixCrawler.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.phoenix; 2 | 3 | import de.mediathekview.mlib.Const; 4 | import de.mediathekview.mlib.daten.DatenFilm; 5 | import de.mediathekview.mlib.tool.Log; 6 | import mServer.crawler.FilmeSuchen; 7 | import mServer.crawler.sender.MediathekCrawler; 8 | import mServer.crawler.sender.base.CrawlerUrlDTO; 9 | import mServer.crawler.sender.phoenix.tasks.PhoenixFilmDetailTask; 10 | import mServer.crawler.sender.phoenix.tasks.PhoenixOverviewTask; 11 | import org.apache.logging.log4j.LogManager; 12 | import org.apache.logging.log4j.Logger; 13 | 14 | import java.util.Collection; 15 | import java.util.Optional; 16 | import java.util.Set; 17 | import java.util.concurrent.ConcurrentLinkedQueue; 18 | import java.util.concurrent.ExecutionException; 19 | import java.util.concurrent.RecursiveTask; 20 | 21 | public class PhoenixCrawler extends MediathekCrawler { 22 | 23 | private static final Logger LOG = LogManager.getLogger(PhoenixCrawler.class); 24 | 25 | public static final String SENDERNAME = Const.PHOENIX; 26 | 27 | public PhoenixCrawler(FilmeSuchen ssearch, int startPrio) { 28 | super(ssearch, SENDERNAME, 0, 1, startPrio); 29 | } 30 | 31 | @Override 32 | protected RecursiveTask> createCrawlerTask() { 33 | final ConcurrentLinkedQueue shows = new ConcurrentLinkedQueue<>(); 34 | 35 | try { 36 | shows.addAll(getShows()); 37 | 38 | Log.sysLog("PHÖNIX Anzahl: " + shows.size()); 39 | 40 | meldungAddMax(shows.size()); 41 | 42 | return new PhoenixFilmDetailTask(this, shows, Optional.empty(), PhoenixConstants.URL_BASE); 43 | } catch (ExecutionException | InterruptedException ex) { 44 | LOG.fatal("Exception in Phönix crawler.", ex); 45 | } 46 | 47 | return null; 48 | } 49 | 50 | private Collection getShows() throws ExecutionException, InterruptedException { 51 | // load sendungen page 52 | CrawlerUrlDTO url = new CrawlerUrlDTO(PhoenixConstants.URL_BASE + PhoenixConstants.URL_OVERVIEW_JSON); 53 | 54 | final ConcurrentLinkedQueue queue = new ConcurrentLinkedQueue<>(); 55 | queue.add(url); 56 | 57 | final Set overviewUrls = loadOverviewPages(queue); 58 | 59 | // load sendung overview pages 60 | final ConcurrentLinkedQueue queue1 = new ConcurrentLinkedQueue<>(); 61 | queue1.addAll(overviewUrls); 62 | final Set filmUrls = loadOverviewPages(queue1); 63 | 64 | return filmUrls; 65 | } 66 | 67 | private Set loadOverviewPages(final ConcurrentLinkedQueue aQueue) 68 | throws ExecutionException, InterruptedException { 69 | PhoenixOverviewTask overviewTask = new PhoenixOverviewTask(this, aQueue, Optional.empty(), PhoenixConstants.URL_BASE); 70 | final Set urls = forkJoinPool.submit(overviewTask).get(); 71 | 72 | return urls; 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/phoenix/parser/PhoenixFilmDetailDto.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.phoenix.parser; 2 | 3 | import java.util.Optional; 4 | 5 | public class PhoenixFilmDetailDto { 6 | 7 | private String baseName; 8 | private String topic; 9 | private String title; 10 | private String description; 11 | private String website; 12 | 13 | public String getBaseName() { 14 | return baseName; 15 | } 16 | 17 | public void setBaseName(String baseName) { 18 | this.baseName = baseName; 19 | } 20 | 21 | public String getTopic() { 22 | return topic; 23 | } 24 | 25 | public void setTopic(String topic) { 26 | this.topic = topic; 27 | } 28 | 29 | public String getTitle() { 30 | return title; 31 | } 32 | 33 | public void setTitle(String title) { 34 | this.title = title; 35 | } 36 | 37 | public String getDescription() { 38 | return description; 39 | } 40 | 41 | public void setDescription(String description) { 42 | this.description = description; 43 | } 44 | 45 | public Optional getWebsite() { 46 | if (website == null || website.isEmpty()) { 47 | return Optional.empty(); 48 | } 49 | 50 | return Optional.of(website); 51 | } 52 | 53 | public void setWebsite(String website) { 54 | this.website = website; 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/sr/SrConstants.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.sr; 2 | 3 | public final class SrConstants { 4 | 5 | private SrConstants() {} 6 | 7 | public static final String URL_BASE = "https://www.sr-mediathek.de/"; 8 | 9 | /** 10 | * URL für Übersichtsseite nach Themen 11 | * Am Ende muss noch eine Buchstabenkombination ergänzt werden, z.B. "abc", "ziffern" 12 | */ 13 | public static final String URL_OVERVIEW_PAGE = "https://www.sr-mediathek.de/index.php?seite=5&a_z="; 14 | 15 | /** 16 | * URL für Archivseite einer Sendung 17 | * Parameter: Kürzel der Sendung, Seitennummer 18 | */ 19 | public static final String URL_SHOW_ARCHIVE_PAGE = "http://www.sr-mediathek.de/index.php?seite=10&sen=%s&s=%s"; 20 | 21 | /** 22 | * URL für die Detailseite eines Films 23 | * Parameter: Id 24 | */ 25 | public static final String URL_FILM_DETAIL = "https://www.sr-mediathek.de/index.php?seite=7&id=%s"; 26 | } 27 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/sr/SrCrawler.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.sr; 2 | 3 | import de.mediathekview.mlib.Const; 4 | import de.mediathekview.mlib.daten.DatenFilm; 5 | import de.mediathekview.mlib.tool.Log; 6 | import mServer.crawler.sender.sr.tasks.SrFilmDetailTask; 7 | import mServer.crawler.sender.sr.tasks.SrTopicArchivePageTask; 8 | import mServer.crawler.sender.sr.tasks.SrTopicsOverviewPageTask; 9 | 10 | import java.util.Set; 11 | import java.util.concurrent.ConcurrentLinkedQueue; 12 | import java.util.concurrent.ExecutionException; 13 | import java.util.concurrent.RecursiveTask; 14 | import mServer.crawler.FilmeSuchen; 15 | import mServer.crawler.sender.MediathekCrawler; 16 | 17 | public class SrCrawler extends MediathekCrawler { 18 | 19 | public static final String SENDERNAME = Const.SR; 20 | 21 | public SrCrawler(FilmeSuchen ssearch, int startPrio) { 22 | super(ssearch, SENDERNAME, 0, 1, startPrio); 23 | } 24 | 25 | @Override 26 | protected RecursiveTask> createCrawlerTask() { 27 | final ConcurrentLinkedQueue filmDtos = new ConcurrentLinkedQueue<>(); 28 | try { 29 | final SrTopicsOverviewPageTask overviewTask = new SrTopicsOverviewPageTask(); 30 | final ConcurrentLinkedQueue shows = forkJoinPool.submit(overviewTask).get(); 31 | 32 | final SrTopicArchivePageTask archiveTask = new SrTopicArchivePageTask(this, shows); 33 | filmDtos.addAll(forkJoinPool.submit(archiveTask).get()); 34 | 35 | Log.sysLog("SR Anzahl: " + filmDtos.size()); 36 | 37 | meldungAddMax(filmDtos.size()); 38 | 39 | } catch (InterruptedException | ExecutionException exception) { 40 | Log.errorLog(56146546, exception); 41 | } 42 | return new SrFilmDetailTask(this, filmDtos); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/sr/SrTopicUrlDTO.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.sr; 2 | 3 | import mServer.crawler.sender.base.CrawlerUrlDTO; 4 | 5 | public class SrTopicUrlDTO extends CrawlerUrlDTO { 6 | 7 | private final String theme; 8 | 9 | public SrTopicUrlDTO(String aTheme, String aUrl) { 10 | super(aUrl); 11 | theme = aTheme; 12 | } 13 | 14 | public String getTheme() { 15 | return theme; 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/sr/tasks/SrRateLimitedDocumentTask.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.sr.tasks; 2 | 3 | import com.google.common.util.concurrent.RateLimiter; 4 | import java.util.concurrent.ConcurrentLinkedQueue; 5 | import mServer.crawler.sender.MediathekReader; 6 | import mServer.crawler.sender.base.AbstractDocumentTask; 7 | import mServer.crawler.sender.base.CrawlerUrlDTO; 8 | import mServer.tool.MserverDaten; 9 | 10 | public abstract class SrRateLimitedDocumentTask extends AbstractDocumentTask { 11 | 12 | private static final long serialVersionUID = -4077182368484515410L; 13 | 14 | private static final RateLimiter LIMITER = RateLimiter.create(MserverDaten.getSrRateLimit()); 15 | 16 | public SrRateLimitedDocumentTask(MediathekReader aCrawler, ConcurrentLinkedQueue aUrlToCrawlDTOs) { 17 | super(aCrawler, aUrlToCrawlDTOs); 18 | } 19 | 20 | @Override 21 | protected void processElement(D aUrlDTO) { 22 | LIMITER.acquire(); 23 | super.processElement(aUrlDTO); 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/srf/SrfConstants.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.srf; 2 | 3 | public final class SrfConstants { 4 | 5 | private SrfConstants() { 6 | } 7 | 8 | public static final String BASE_URL = "https://www.srf.ch"; 9 | 10 | /** 11 | * URL für Übersichtsseite der Mediathek 12 | */ 13 | public static final String OVERVIEW_PAGE_URL = BASE_URL + "/play/v3/api/srf/production/shows?onlyActiveShows=false"; 14 | /** 15 | * URL für Übersichtsseite einer Sendung Parameter: Id 16 | */ 17 | public static final String SHOW_OVERVIEW_PAGE_URL = "%s/play/v3/api/srf/production/videos-by-show-id?showId=%s"; 18 | /** 19 | * URL für folgende Übersichtsseiten einer Sendung Parameter: Id, next-"Key" 20 | */ 21 | public static final String SHOW_OVERVIEW_NEXT_PAGE_URL = SHOW_OVERVIEW_PAGE_URL + "&next=%s"; 22 | /** 23 | * URL für Detailsinformation einer Folge Parameter: Id 24 | */ 25 | public static final String SHOW_DETAIL_PAGE_URL = "https://il.srgssr.ch/integrationlayer/2.0/mediaComposition/byUrn/urn:srf:video:%s.json"; 26 | /** 27 | * URL für Webseite einer Folge Parameter: Thema, Titel, Id 28 | */ 29 | public static final String WEBSITE_URL = "https://www.srf.ch/play/tv/%s/video/%s?id=%s"; 30 | /** 31 | * Id der Sendung SportClip, die nicht unter Sendungen A-Z gelistet ist 32 | */ 33 | public static final String ID_SHOW_SPORT_CLIP = "5327eac1-e5a1-40aa-9f71-707e48258097"; 34 | } 35 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/srf/SrfCrawler.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.srf; 2 | 3 | import de.mediathekview.mlib.Const; 4 | import de.mediathekview.mlib.daten.DatenFilm; 5 | import de.mediathekview.mlib.tool.Log; 6 | import mServer.crawler.sender.srf.tasks.SrfFilmDetailTask; 7 | import java.util.Set; 8 | import java.util.concurrent.ConcurrentLinkedQueue; 9 | import java.util.concurrent.ExecutionException; 10 | import java.util.concurrent.RecursiveTask; 11 | import mServer.crawler.CrawlerTool; 12 | import mServer.crawler.FilmeSuchen; 13 | import mServer.crawler.sender.MediathekCrawler; 14 | import mServer.crawler.sender.base.CrawlerUrlDTO; 15 | import mServer.crawler.sender.base.TopicUrlDTO; 16 | import mServer.crawler.sender.srf.tasks.SrfTopicOverviewTask; 17 | import mServer.crawler.sender.srf.tasks.SrfTopicsOverviewTask; 18 | import org.apache.logging.log4j.LogManager; 19 | import org.apache.logging.log4j.Logger; 20 | 21 | public class SrfCrawler extends MediathekCrawler { 22 | 23 | private static final Logger LOG = LogManager.getLogger(SrfCrawler.class); 24 | 25 | public SrfCrawler(FilmeSuchen ssearch, int startPrio) { 26 | super(ssearch, Const.SRF, 0, 1, startPrio); 27 | } 28 | 29 | @Override 30 | protected RecursiveTask> createCrawlerTask() { 31 | try { 32 | final ConcurrentLinkedQueue topicsUrls = new ConcurrentLinkedQueue<>(); 33 | topicsUrls.add(new CrawlerUrlDTO(SrfConstants.OVERVIEW_PAGE_URL)); 34 | final SrfTopicsOverviewTask overviewTask = new SrfTopicsOverviewTask(this, topicsUrls); 35 | final ConcurrentLinkedQueue topicUrls = new ConcurrentLinkedQueue<>(forkJoinPool.submit(overviewTask).get()); 36 | 37 | final SrfTopicOverviewTask task = new SrfTopicOverviewTask(this, topicUrls, SrfConstants.BASE_URL, getMaxSubPages()); 38 | forkJoinPool.execute(task); 39 | 40 | final ConcurrentLinkedQueue dtos 41 | = new ConcurrentLinkedQueue<>(); 42 | dtos.addAll(task.join()); 43 | 44 | Log.sysLog("SRF Anzahl: " + dtos.size()); 45 | meldungAddMax(dtos.size()); 46 | 47 | return new SrfFilmDetailTask(this, dtos); 48 | 49 | } catch (InterruptedException | ExecutionException ex) { 50 | LOG.fatal("Exception in SRF crawler.", ex); 51 | Log.errorLog(745615611, ex); 52 | } 53 | return null; 54 | } 55 | 56 | private static int getMaxSubPages() { 57 | if (CrawlerTool.loadLongMax()) { 58 | return 5; 59 | } 60 | 61 | return 1; 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/srf/parser/SrfTopicDeserializer.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.srf.parser; 2 | 3 | import com.google.gson.JsonArray; 4 | import com.google.gson.JsonDeserializationContext; 5 | import com.google.gson.JsonDeserializer; 6 | import com.google.gson.JsonElement; 7 | import com.google.gson.JsonObject; 8 | import java.lang.reflect.Type; 9 | import java.util.Optional; 10 | import mServer.crawler.sender.base.CrawlerUrlDTO; 11 | import mServer.crawler.sender.base.JsonUtils; 12 | import mServer.crawler.sender.base.PagedElementListDTO; 13 | import mServer.crawler.sender.srf.SrfConstants; 14 | 15 | public class SrfTopicDeserializer implements JsonDeserializer> { 16 | 17 | private static final String ELEMENT_DATA = "data"; 18 | private static final String ATTRIBUTE_ID = "id"; 19 | private static final String ATTRIBUTE_NEXT = "next"; 20 | 21 | @Override 22 | public PagedElementListDTO deserialize( 23 | JsonElement jsonElement, Type type, JsonDeserializationContext jsonDeserializationContext) { 24 | final PagedElementListDTO results = new PagedElementListDTO<>(); 25 | 26 | if (!jsonElement.getAsJsonObject().has(ELEMENT_DATA) 27 | || !jsonElement.getAsJsonObject().get(ELEMENT_DATA).getAsJsonObject().has(ELEMENT_DATA) 28 | || !jsonElement 29 | .getAsJsonObject() 30 | .get(ELEMENT_DATA) 31 | .getAsJsonObject() 32 | .get(ELEMENT_DATA) 33 | .isJsonArray()) { 34 | return results; 35 | } 36 | 37 | final JsonObject dataObject = jsonElement.getAsJsonObject().get(ELEMENT_DATA).getAsJsonObject(); 38 | 39 | results.setNextPage(parseNextPage(dataObject)); 40 | 41 | final JsonArray data = dataObject.getAsJsonArray(ELEMENT_DATA); 42 | data.forEach( 43 | entry -> { 44 | final Optional id 45 | = JsonUtils.getAttributeAsString(entry.getAsJsonObject(), ATTRIBUTE_ID); 46 | 47 | id.ifPresent( 48 | s 49 | -> results.addElement( 50 | new CrawlerUrlDTO(String.format(SrfConstants.SHOW_DETAIL_PAGE_URL, s)))); 51 | }); 52 | 53 | return results; 54 | } 55 | 56 | private Optional parseNextPage(final JsonObject dataObject) { 57 | Optional next = JsonUtils.getAttributeAsString(dataObject, ATTRIBUTE_NEXT); 58 | // ignore empty string value of next 59 | if (next.isPresent() && !next.get().isEmpty()) { 60 | return next; 61 | } 62 | 63 | return Optional.empty(); 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/srf/parser/SrfTopicsDeserializer.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.srf.parser; 2 | 3 | import com.google.gson.JsonArray; 4 | import com.google.gson.JsonDeserializationContext; 5 | import com.google.gson.JsonDeserializer; 6 | import com.google.gson.JsonElement; 7 | import java.lang.reflect.Type; 8 | import java.util.HashSet; 9 | import java.util.Optional; 10 | import java.util.Set; 11 | import mServer.crawler.sender.base.JsonUtils; 12 | import mServer.crawler.sender.base.TopicUrlDTO; 13 | import mServer.crawler.sender.srf.SrfConstants; 14 | 15 | public class SrfTopicsDeserializer implements JsonDeserializer> { 16 | 17 | private static final String ELEMENT_DATA = "data"; 18 | private static final String ATTRIBUTE_ID = "id"; 19 | 20 | @Override 21 | public Set deserialize( 22 | JsonElement jsonElement, Type type, JsonDeserializationContext jsonDeserializationContext) { 23 | final Set results = new HashSet<>(); 24 | 25 | if (!jsonElement.getAsJsonObject().has(ELEMENT_DATA) 26 | || !jsonElement.getAsJsonObject().get(ELEMENT_DATA).isJsonArray()) { 27 | return results; 28 | } 29 | 30 | final JsonArray data = jsonElement.getAsJsonObject().getAsJsonArray(ELEMENT_DATA); 31 | 32 | data.forEach( 33 | entry -> { 34 | final Optional id 35 | = JsonUtils.getAttributeAsString(entry.getAsJsonObject(), ATTRIBUTE_ID); 36 | 37 | id.ifPresent( 38 | s 39 | -> results.add( 40 | new TopicUrlDTO(s, String.format(SrfConstants.SHOW_OVERVIEW_PAGE_URL, SrfConstants.BASE_URL, s)))); 41 | }); 42 | 43 | return results; 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/srf/tasks/SrfTopicsOverviewTask.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.srf.tasks; 2 | 3 | import com.google.gson.reflect.TypeToken; 4 | import java.lang.reflect.Type; 5 | import java.util.HashSet; 6 | import java.util.Set; 7 | import java.util.concurrent.ConcurrentLinkedQueue; 8 | import jakarta.ws.rs.client.WebTarget; 9 | import mServer.crawler.sender.MediathekReader; 10 | import mServer.crawler.sender.ard.tasks.ArdTaskBase; 11 | import mServer.crawler.sender.base.AbstractRecursivConverterTask; 12 | import mServer.crawler.sender.base.CrawlerUrlDTO; 13 | import mServer.crawler.sender.base.TopicUrlDTO; 14 | import mServer.crawler.sender.srf.SrfConstants; 15 | import mServer.crawler.sender.srf.parser.SrfTopicsDeserializer; 16 | 17 | public class SrfTopicsOverviewTask extends ArdTaskBase { 18 | 19 | private static final Type SET_CRAWLER_URL_TYPE_TOKEN 20 | = new TypeToken>() { 21 | }.getType(); 22 | 23 | public SrfTopicsOverviewTask( 24 | MediathekReader aCrawler, ConcurrentLinkedQueue aURLsToCrawl) { 25 | super(aCrawler, aURLsToCrawl); 26 | 27 | registerJsonDeserializer(SET_CRAWLER_URL_TYPE_TOKEN, new SrfTopicsDeserializer()); 28 | } 29 | 30 | @Override 31 | protected AbstractRecursivConverterTask createNewOwnInstance( 32 | ConcurrentLinkedQueue aElementsToProcess) { 33 | return new SrfTopicsOverviewTask(crawler, aElementsToProcess); 34 | } 35 | 36 | @Override 37 | protected void processRestTarget(CrawlerUrlDTO aDTO, WebTarget aTarget) { 38 | Set results = deserialize(aTarget, SET_CRAWLER_URL_TYPE_TOKEN); 39 | taskResults.addAll(results); 40 | taskResults.addAll(addSpecialShows()); 41 | } 42 | 43 | private Set addSpecialShows() { 44 | Set shows = new HashSet<>(); 45 | shows.add( 46 | new TopicUrlDTO( 47 | SrfConstants.ID_SHOW_SPORT_CLIP, 48 | String.format( 49 | SrfConstants.SHOW_OVERVIEW_PAGE_URL, 50 | SrfConstants.BASE_URL, 51 | SrfConstants.ID_SHOW_SPORT_CLIP))); 52 | 53 | return shows; 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/zdf/DownloadDtoFilmConverter.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.zdf; 2 | 3 | import mServer.crawler.sender.base.Qualities; 4 | 5 | import java.util.Map; 6 | import java.util.Optional; 7 | 8 | public class DownloadDtoFilmConverter { 9 | 10 | private DownloadDtoFilmConverter() { 11 | } 12 | 13 | public static void getOptimizedUrls( 14 | final Map downloadUrls, 15 | final Optional aUrlOptimizer) { 16 | 17 | for (final Map.Entry qualitiesEntry : downloadUrls.entrySet()) { 18 | String url = qualitiesEntry.getValue(); 19 | 20 | if (qualitiesEntry.getKey() == Qualities.NORMAL && aUrlOptimizer.isPresent()) { 21 | url = aUrlOptimizer.get().getOptimizedUrlNormal(url); 22 | qualitiesEntry.setValue(url); 23 | } 24 | if (qualitiesEntry.getKey() == Qualities.HD && aUrlOptimizer.isPresent()) { 25 | url = aUrlOptimizer.get().getOptimizedUrlHd(url); 26 | qualitiesEntry.setValue(url); 27 | } 28 | } 29 | 30 | if (!downloadUrls.containsKey(Qualities.HD) && aUrlOptimizer.isPresent()) { 31 | final Optional hdUrl 32 | = aUrlOptimizer.get().determineUrlHd(downloadUrls.get(Qualities.NORMAL)); 33 | hdUrl.ifPresent(url -> downloadUrls.put(Qualities.HD, url)); 34 | } 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/zdf/ZdfConfiguration.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.zdf; 2 | 3 | import java.util.Optional; 4 | 5 | public class ZdfConfiguration { 6 | 7 | private Optional searchAuthKey; 8 | private Optional videoAuthKey; 9 | 10 | public ZdfConfiguration() { 11 | searchAuthKey = Optional.of("5bb200097db507149612d7d983131d06c79706d5"); 12 | videoAuthKey = Optional.of("20c238b5345eb428d01ae5c748c5076f033dfcc7"); 13 | } 14 | 15 | public Optional getSearchAuthKey() { 16 | return searchAuthKey; 17 | } 18 | 19 | public void setSearchAuthKey(final String aAuthKey) { 20 | searchAuthKey = Optional.of(aAuthKey); 21 | } 22 | 23 | public Optional getVideoAuthKey() { 24 | return videoAuthKey; 25 | } 26 | 27 | public void setVideoAuthKey(final String aAuthKey) { 28 | videoAuthKey = Optional.of(aAuthKey); 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/zdf/ZdfConstants.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.zdf; 2 | 3 | public final class ZdfConstants { 4 | 5 | /** Name of the header required for authentification. */ 6 | public static final String HEADER_AUTHENTIFICATION = "Api-Auth"; 7 | 8 | /** Base url of the ZDF website. */ 9 | public static final String URL_BASE = "https://www.zdf.de"; 10 | 11 | public static final String URL_HTML_DAY = URL_BASE + "/sendung-verpasst?airtimeDate=%s"; 12 | 13 | public static final String URL_TOPICS = URL_BASE + "/sendungen-a-z"; 14 | 15 | /** Base url of the ZDF api. */ 16 | public static final String URL_API_BASE = "https://api.zdf.de"; 17 | 18 | /** Url to search the films. */ 19 | public static final String URL_DAY = 20 | URL_API_BASE 21 | + "/search/documents?hasVideo=true&q=*&types=page-video&sortOrder=desc&from=%sT00:00:00.000%%2B01:00&to=%sT23:59:59.999%%2B01:00&sortBy=date&page=1"; 22 | 23 | /** Url to request film details */ 24 | public static final String URL_FILM_JSON = "%s/content/documents/%s.json"; 25 | 26 | public static final String LANGUAGE_SUFFIX_AD = "-ad"; 27 | public static final String LANGUAGE_SUFFIX_DGS = "-dgs"; 28 | 29 | /** The language key of english. */ 30 | public static final String LANGUAGE_ENGLISH = "eng"; 31 | /** The language key of french. */ 32 | public static final String LANGUAGE_FRENCH = "fra"; 33 | /** The language key of german. */ 34 | public static final String LANGUAGE_GERMAN = "deu"; 35 | /** The language key of german audio description. */ 36 | public static final String LANGUAGE_GERMAN_AD = LANGUAGE_GERMAN + LANGUAGE_SUFFIX_AD; 37 | public static final String LANGUAGE_GERMAN_DGS = LANGUAGE_GERMAN + LANGUAGE_SUFFIX_DGS; 38 | 39 | private ZdfConstants() {} 40 | } 41 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/zdf/ZdfDatenFilm.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.zdf; 2 | 3 | import de.mediathekview.mlib.daten.DatenFilm; 4 | 5 | public class ZdfDatenFilm extends DatenFilm { 6 | 7 | public ZdfDatenFilm(String ssender, String tthema, String filmWebsite, String ttitel, String uurl, String uurlRtmp, 8 | String datum, String zeit, long dauerSekunden, String description) { 9 | super(ssender, tthema, filmWebsite, ttitel, uurl, uurlRtmp, datum, zeit, dauerSekunden, description); 10 | } 11 | 12 | @Override 13 | public String getIndex() { 14 | // zdf uses different hosts for load balancing 15 | // https://rodl..., https://nrodl... 16 | // ignore the hosts in index to avoid duplicate entries 17 | String url = getUrl(); 18 | 19 | url = url.replaceFirst("https://nrodl", "https://rodl") 20 | .replaceFirst("http://nrodl", "http://rodl"); 21 | 22 | return arr[FILM_SENDER].toLowerCase() + arr[FILM_THEMA].toLowerCase() + url; 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/zdf/ZdfFilmDto.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.zdf; 2 | 3 | import mServer.crawler.sender.base.CrawlerUrlDTO; 4 | 5 | import java.time.Duration; 6 | import java.time.LocalDateTime; 7 | import java.util.Objects; 8 | import java.util.Optional; 9 | 10 | public class ZdfFilmDto extends CrawlerUrlDTO { 11 | 12 | private final Optional topic; 13 | private final String title; 14 | private final Optional description; 15 | private final Optional website; 16 | private final Optional time; 17 | private final Optional duration; 18 | private final Optional urlSignLanguage; 19 | 20 | public ZdfFilmDto(String url, Optional topic, String title, 21 | Optional description, Optional website, 22 | Optional time, Optional duration, String urlSignLanguage) { 23 | super(url); 24 | this.topic = topic; 25 | this.title = title; 26 | this.description = description; 27 | this.website = website; 28 | this.time = time; 29 | this.duration = duration; 30 | 31 | if (urlSignLanguage != null && !urlSignLanguage.isEmpty()) { 32 | this.urlSignLanguage = Optional.of(urlSignLanguage); 33 | } else { 34 | this.urlSignLanguage = Optional.empty(); 35 | } 36 | } 37 | 38 | public Optional getTopic() { 39 | return topic; 40 | } 41 | 42 | public String getTitle() { 43 | return title; 44 | } 45 | 46 | public Optional getDescription() { 47 | return description; 48 | } 49 | 50 | public Optional getWebsite() { 51 | return website; 52 | } 53 | 54 | public Optional getTime() { 55 | return time; 56 | } 57 | 58 | public Optional getDuration() { 59 | return duration; 60 | } 61 | 62 | public Optional getUrlSignLanguage() { 63 | return urlSignLanguage; 64 | } 65 | 66 | @Override 67 | public boolean equals(Object o) { 68 | if (this == o) { 69 | return true; 70 | } 71 | if (!(o instanceof ZdfFilmDto)) { 72 | return false; 73 | } 74 | if (!super.equals(o)) { 75 | return false; 76 | } 77 | ZdfFilmDto that = (ZdfFilmDto) o; 78 | return Objects.equals(topic, that.topic) 79 | && Objects.equals(title, that.title) 80 | && Objects.equals(description, that.description) 81 | && Objects.equals(website, that.website) 82 | && Objects.equals(time, that.time) 83 | && Objects.equals(duration, that.duration) 84 | && Objects.equals(urlSignLanguage, that.urlSignLanguage); 85 | } 86 | 87 | @Override 88 | public int hashCode() { 89 | return Objects.hash(super.hashCode(), topic, title, description, website, time, duration, urlSignLanguage); 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/zdf/json/DownloadDto.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.zdf.json; 2 | 3 | import java.time.Duration; 4 | import java.util.EnumMap; 5 | import java.util.HashMap; 6 | import java.util.Map; 7 | import java.util.Optional; 8 | import java.util.Set; 9 | import mServer.crawler.sender.base.GeoLocations; 10 | import mServer.crawler.sender.base.Qualities; 11 | 12 | /** 13 | * A data transfer object containing the information for downloading a video. 14 | */ 15 | public class DownloadDto { 16 | 17 | private Optional geoLocation; 18 | private final Map subTitleUrls; 19 | private final Map> downloadUrls; 20 | private Optional duration; 21 | 22 | public DownloadDto() { 23 | downloadUrls = new HashMap<>(); 24 | geoLocation = Optional.empty(); 25 | subTitleUrls = new HashMap<>(); 26 | } 27 | 28 | public void addUrl(final String language, final Qualities quality, final String url) { 29 | if (!downloadUrls.containsKey(language)) { 30 | downloadUrls.put(language, new EnumMap<>(Qualities.class)); 31 | } 32 | 33 | Map urlMap = downloadUrls.get(language); 34 | urlMap.put(quality, url); 35 | } 36 | 37 | public Map getDownloadUrls(final String language) { 38 | if (downloadUrls.containsKey(language)) { 39 | return downloadUrls.get(language); 40 | } 41 | 42 | return new EnumMap<>(Qualities.class); 43 | } 44 | 45 | public Optional getDuration() { return duration; } 46 | 47 | public Set getLanguages() { 48 | return downloadUrls.keySet(); 49 | } 50 | 51 | public Optional getGeoLocation() { 52 | return geoLocation; 53 | } 54 | 55 | public Optional getSubTitleUrl(String language) { 56 | if (subTitleUrls.containsKey(language)) { 57 | return Optional.of(subTitleUrls.get(language)); 58 | } 59 | return Optional.empty(); 60 | } 61 | 62 | public Optional getUrl(final String language, final Qualities resolution) { 63 | if (downloadUrls.containsKey(language)) { 64 | Map urlMap = downloadUrls.get(language); 65 | if (urlMap.containsKey(resolution)) { 66 | return Optional.of(urlMap.get(resolution)); 67 | } 68 | } 69 | return Optional.empty(); 70 | } 71 | 72 | public void setDuration(final Duration duration) { 73 | this.duration = Optional.of(duration); 74 | } 75 | 76 | public void setGeoLocation(final GeoLocations aGeoLocation) { 77 | geoLocation = Optional.of(aGeoLocation); 78 | } 79 | 80 | public void addSubTitleUrl(final String language, final String aUrl) { 81 | subTitleUrls.put(language, aUrl); 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/zdf/json/ZdfDayPageDto.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.zdf.json; 2 | 3 | import java.util.ArrayList; 4 | import java.util.Collection; 5 | import java.util.Optional; 6 | import mServer.crawler.sender.base.CrawlerUrlDTO; 7 | 8 | public class ZdfDayPageDto { 9 | 10 | private final Collection entries; 11 | private Optional nextPageUrl; 12 | 13 | public ZdfDayPageDto() { 14 | entries = new ArrayList<>(); 15 | nextPageUrl = Optional.empty(); 16 | } 17 | 18 | public void addEntry(CrawlerUrlDTO entry) { 19 | entries.add(entry); 20 | } 21 | 22 | public Collection getEntries() { 23 | return entries; 24 | } 25 | 26 | public Optional getNextPageUrl() { 27 | return nextPageUrl; 28 | } 29 | 30 | public void setNextPageUrl(final String aUrl) { 31 | nextPageUrl = Optional.of(aUrl); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/zdf/parser/ZdfDayPageHtmlDeserializer.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.zdf.parser; 2 | 3 | import java.util.HashSet; 4 | import java.util.Optional; 5 | import java.util.Set; 6 | import mServer.crawler.sender.base.CrawlerUrlDTO; 7 | import mServer.crawler.sender.base.UrlUtils; 8 | import mServer.crawler.sender.zdf.ZdfConstants; 9 | import org.jsoup.nodes.Document; 10 | import org.jsoup.select.Elements; 11 | 12 | public class ZdfDayPageHtmlDeserializer { 13 | 14 | private static final String ATTRIBUTE_HREF = "href"; 15 | private static final String LINK_SELECTOR = "article h3 a"; 16 | private final String urlApiBase; 17 | 18 | public ZdfDayPageHtmlDeserializer(final String urlApiBase) { 19 | this.urlApiBase = urlApiBase; 20 | } 21 | 22 | public Set deserialize(final Document document) { 23 | final Set results = new HashSet<>(); 24 | 25 | Elements filmUrls = document.select(LINK_SELECTOR); 26 | filmUrls.forEach( 27 | filmUrlElement -> { 28 | final Optional url 29 | = buildFilmUrlJsonFromHtmlLink(filmUrlElement.attr(ATTRIBUTE_HREF)); 30 | url.ifPresent(s -> results.add(new CrawlerUrlDTO(s))); 31 | }); 32 | 33 | return results; 34 | } 35 | 36 | private Optional buildFilmUrlJsonFromHtmlLink(String attr) { 37 | return UrlUtils.getFileName(attr) 38 | .map(s -> String.format(ZdfConstants.URL_FILM_JSON, urlApiBase, s.split("\\.")[0])); 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/zdf/parser/ZdfLetterListHtmlDeserializer.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.zdf.parser; 2 | 3 | import mServer.crawler.sender.base.CrawlerUrlDTO; 4 | import mServer.crawler.sender.base.UrlUtils; 5 | import mServer.crawler.sender.zdf.ZdfConstants; 6 | import org.jsoup.nodes.Document; 7 | import org.jsoup.select.Elements; 8 | 9 | import java.util.HashSet; 10 | import java.util.Set; 11 | 12 | public class ZdfLetterListHtmlDeserializer { 13 | private static final String LINK_SELECTOR = "ul.letter-list li a"; 14 | private static final String ATTRIBUTE_HREF = "href"; 15 | 16 | public Set deserialize(final Document document) { 17 | final Set results = new HashSet<>(); 18 | 19 | Elements filmUrls = document.select(LINK_SELECTOR); 20 | filmUrls.forEach( 21 | filmUrlElement -> { 22 | String url = filmUrlElement.attr(ATTRIBUTE_HREF); 23 | url = UrlUtils.addDomainIfMissing(url, ZdfConstants.URL_BASE); 24 | results.add(new CrawlerUrlDTO(url)); 25 | }); 26 | 27 | return results; 28 | } 29 | 30 | } 31 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/zdf/parser/ZdfTopicsPageHtmlDeserializer.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.zdf.parser; 2 | 3 | import mServer.crawler.sender.base.CrawlerUrlDTO; 4 | import mServer.crawler.sender.base.UrlUtils; 5 | import mServer.crawler.sender.zdf.ZdfConstants; 6 | import org.jsoup.nodes.Document; 7 | import org.jsoup.nodes.Element; 8 | import org.jsoup.select.Elements; 9 | 10 | import java.util.HashSet; 11 | import java.util.Set; 12 | 13 | public class ZdfTopicsPageHtmlDeserializer { 14 | 15 | private static final String ARTICLE_SELECTOR = "article"; 16 | private static final String LINK_SELECTOR = "h3 a"; 17 | private static final String TEASER_SELECTOR = "dd.teaser-info span"; 18 | private static final String ATTRIBUTE_HREF = "href"; 19 | 20 | public Set deserialize(final Document document) { 21 | final Set results = new HashSet<>(); 22 | 23 | Elements filmUrls = document.select(ARTICLE_SELECTOR); 24 | filmUrls.forEach( 25 | articleElement -> { 26 | final Element filmUrlElement = articleElement.selectFirst(LINK_SELECTOR); 27 | final Element teaserElement = articleElement.selectFirst(TEASER_SELECTOR); 28 | if (filmUrlElement != null && isRelevant(teaserElement)) { 29 | String url = filmUrlElement.attr(ATTRIBUTE_HREF); 30 | url = UrlUtils.addDomainIfMissing(url, ZdfConstants.URL_BASE); 31 | results.add(new CrawlerUrlDTO(url)); 32 | } 33 | }); 34 | 35 | return results; 36 | } 37 | 38 | private boolean isRelevant(Element teaserElement) { 39 | if (teaserElement == null) { 40 | return true; 41 | } 42 | return !("ARD".equalsIgnoreCase(teaserElement.text()) || "funk".equalsIgnoreCase(teaserElement.text())); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/zdf/tasks/ZdfDayPageHtmlTask.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.zdf.tasks; 2 | 3 | import java.util.concurrent.ConcurrentLinkedQueue; 4 | import mServer.crawler.sender.MediathekReader; 5 | import mServer.crawler.sender.base.AbstractDocumentTask; 6 | import mServer.crawler.sender.base.AbstractRecursivConverterTask; 7 | import mServer.crawler.sender.base.CrawlerUrlDTO; 8 | import mServer.crawler.sender.base.JsoupConnection; 9 | import mServer.crawler.sender.zdf.parser.ZdfDayPageHtmlDeserializer; 10 | import org.jsoup.nodes.Document; 11 | 12 | public class ZdfDayPageHtmlTask extends AbstractDocumentTask { 13 | 14 | private final transient ZdfDayPageHtmlDeserializer deserializer; 15 | private final String apiUrlBase; 16 | private final transient JsoupConnection jsoupConnection; 17 | 18 | public ZdfDayPageHtmlTask( 19 | final String apiUrlBase, 20 | final MediathekReader crawler, 21 | final ConcurrentLinkedQueue urlToCrawlDTOs, 22 | final JsoupConnection jsoupConnection) { 23 | super(crawler, urlToCrawlDTOs); 24 | this.apiUrlBase = apiUrlBase; 25 | deserializer = new ZdfDayPageHtmlDeserializer(apiUrlBase); 26 | this.jsoupConnection = jsoupConnection; 27 | } 28 | 29 | @Override 30 | protected void processDocument(CrawlerUrlDTO aUrlDTO, Document aDocument) { 31 | taskResults.addAll(deserializer.deserialize(aDocument)); 32 | } 33 | 34 | @Override 35 | protected AbstractRecursivConverterTask createNewOwnInstance( 36 | ConcurrentLinkedQueue aElementsToProcess) { 37 | return new ZdfDayPageHtmlTask(apiUrlBase, crawler, aElementsToProcess, jsoupConnection); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/zdf/tasks/ZdfDayPageTask.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.zdf.tasks; 2 | 3 | import java.util.Optional; 4 | import java.util.concurrent.ConcurrentLinkedQueue; 5 | import jakarta.ws.rs.client.WebTarget; 6 | import mServer.crawler.sender.MediathekReader; 7 | import mServer.crawler.sender.base.AbstractRecursivConverterTask; 8 | import mServer.crawler.sender.base.CrawlerUrlDTO; 9 | import mServer.crawler.sender.zdf.json.ZdfDayPageDeserializer; 10 | import mServer.crawler.sender.zdf.json.ZdfDayPageDto; 11 | 12 | public class ZdfDayPageTask extends ZdfTaskBase { 13 | 14 | private final String apiUrlBase; 15 | 16 | public ZdfDayPageTask( 17 | final MediathekReader aCrawler, 18 | final String aApiUrlBase, 19 | final ConcurrentLinkedQueue aUrlToCrawlDtos, 20 | final Optional aAuthKey) { 21 | super(aCrawler, aUrlToCrawlDtos, aAuthKey); 22 | apiUrlBase = aApiUrlBase; 23 | registerJsonDeserializer(ZdfDayPageDto.class, new ZdfDayPageDeserializer(apiUrlBase)); 24 | } 25 | 26 | @Override 27 | protected void processRestTarget(final CrawlerUrlDTO aDto, final WebTarget aTarget) { 28 | final ZdfDayPageDto dto = deserialize(aTarget, ZdfDayPageDto.class); 29 | if (dto != null) { 30 | taskResults.addAll(dto.getEntries()); 31 | processNextPage(dto); 32 | } 33 | } 34 | 35 | @Override 36 | protected AbstractRecursivConverterTask createNewOwnInstance( 37 | final ConcurrentLinkedQueue aElementsToProcess) { 38 | return new ZdfDayPageTask(crawler, apiUrlBase, aElementsToProcess, authKey); 39 | } 40 | 41 | private void processNextPage(final ZdfDayPageDto entries) { 42 | if (entries.getNextPageUrl().isPresent() && !entries.getEntries().isEmpty()) { 43 | final ConcurrentLinkedQueue urls = new ConcurrentLinkedQueue<>(); 44 | urls.add(new CrawlerUrlDTO(entries.getNextPageUrl().get())); 45 | taskResults.addAll(createNewOwnInstance(urls).invoke()); 46 | } 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/zdf/tasks/ZdfLetterListHtmlTask.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.zdf.tasks; 2 | 3 | import mServer.crawler.sender.MediathekReader; 4 | import mServer.crawler.sender.base.AbstractDocumentTask; 5 | import mServer.crawler.sender.base.AbstractRecursivConverterTask; 6 | import mServer.crawler.sender.base.CrawlerUrlDTO; 7 | import mServer.crawler.sender.zdf.parser.ZdfLetterListHtmlDeserializer; 8 | import org.jsoup.nodes.Document; 9 | 10 | import java.util.Queue; 11 | import java.util.concurrent.ConcurrentLinkedQueue; 12 | 13 | public class ZdfLetterListHtmlTask extends AbstractDocumentTask { 14 | 15 | private final transient ZdfLetterListHtmlDeserializer letterListDeserializer; 16 | 17 | public ZdfLetterListHtmlTask( 18 | final MediathekReader crawler, final ConcurrentLinkedQueue urlToCrawlDTOs) { 19 | super(crawler, urlToCrawlDTOs); 20 | letterListDeserializer = new ZdfLetterListHtmlDeserializer(); 21 | } 22 | 23 | @Override 24 | protected void processDocument(final CrawlerUrlDTO aUrlDTO, final Document aDocument) { 25 | taskResults.addAll(letterListDeserializer.deserialize(aDocument)); 26 | } 27 | 28 | @Override 29 | protected AbstractRecursivConverterTask createNewOwnInstance( 30 | final ConcurrentLinkedQueue aElementsToProcess) { 31 | return new ZdfLetterListHtmlTask(crawler, aElementsToProcess); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/zdf/tasks/ZdfTopicPageHtmlTask.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.zdf.tasks; 2 | 3 | import mServer.crawler.sender.MediathekReader; 4 | import mServer.crawler.sender.base.AbstractDocumentTask; 5 | import mServer.crawler.sender.base.AbstractRecursivConverterTask; 6 | import mServer.crawler.sender.base.CrawlerUrlDTO; 7 | import mServer.crawler.sender.zdf.ZdfConstants; 8 | import mServer.crawler.sender.zdf.parser.ZdfTopicPageHtmlDeserializer; 9 | import org.jsoup.nodes.Document; 10 | 11 | import java.util.concurrent.ConcurrentLinkedQueue; 12 | 13 | public class ZdfTopicPageHtmlTask extends AbstractDocumentTask { 14 | 15 | private final transient ZdfTopicPageHtmlDeserializer topicsDeserializer; 16 | 17 | public ZdfTopicPageHtmlTask( 18 | final MediathekReader crawler, final ConcurrentLinkedQueue urlToCrawlDTOs) { 19 | super(crawler, urlToCrawlDTOs); 20 | topicsDeserializer = new ZdfTopicPageHtmlDeserializer(ZdfConstants.URL_API_BASE); 21 | } 22 | 23 | @Override 24 | protected void processDocument(final CrawlerUrlDTO aUrlDTO, final Document aDocument) { 25 | taskResults.addAll(topicsDeserializer.deserialize(aDocument)); 26 | } 27 | 28 | @Override 29 | protected AbstractRecursivConverterTask createNewOwnInstance( 30 | final ConcurrentLinkedQueue aElementsToProcess) { 31 | return new ZdfTopicPageHtmlTask(crawler, aElementsToProcess); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/main/java/mServer/crawler/sender/zdf/tasks/ZdfTopicsPageHtmlTask.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.zdf.tasks; 2 | 3 | import mServer.crawler.sender.MediathekReader; 4 | import mServer.crawler.sender.base.AbstractDocumentTask; 5 | import mServer.crawler.sender.base.AbstractRecursivConverterTask; 6 | import mServer.crawler.sender.base.CrawlerUrlDTO; 7 | import mServer.crawler.sender.zdf.parser.ZdfTopicsPageHtmlDeserializer; 8 | import org.jsoup.nodes.Document; 9 | 10 | import java.util.concurrent.ConcurrentLinkedQueue; 11 | 12 | public class ZdfTopicsPageHtmlTask extends AbstractDocumentTask { 13 | 14 | private final transient ZdfTopicsPageHtmlDeserializer topicsDeserializer; 15 | 16 | public ZdfTopicsPageHtmlTask( 17 | final MediathekReader crawler, final ConcurrentLinkedQueue urlToCrawlDTOs) { 18 | super(crawler, urlToCrawlDTOs); 19 | topicsDeserializer = new ZdfTopicsPageHtmlDeserializer(); 20 | } 21 | 22 | @Override 23 | protected void processDocument(final CrawlerUrlDTO aUrlDTO, final Document aDocument) { 24 | taskResults.addAll(topicsDeserializer.deserialize(aDocument)); 25 | } 26 | 27 | @Override 28 | protected AbstractRecursivConverterTask createNewOwnInstance( 29 | final ConcurrentLinkedQueue aElementsToProcess) { 30 | return new ZdfTopicsPageHtmlTask(crawler, aElementsToProcess); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/main/java/mServer/daten/MserverListeSuchen.java: -------------------------------------------------------------------------------- 1 | /* 2 | * MediathekView 3 | * Copyright (C) 2008 W. Xaver 4 | * W.Xaver[at]googlemail.com 5 | * http://zdfmediathk.sourceforge.net/ 6 | * 7 | * This program is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * any later version. 11 | * 12 | * This program is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with this program. If not, see . 19 | */ 20 | package mServer.daten; 21 | 22 | import java.util.Date; 23 | import java.util.LinkedList; 24 | 25 | public class MserverListeSuchen extends LinkedList { 26 | 27 | private static final long serialVersionUID = 1L; 28 | 29 | @Override 30 | public boolean add(MserverSearchTask d) { 31 | if (d.sofortSuchen()) { 32 | addFirst(d); 33 | return true; 34 | } 35 | // nach Datum sortiert, einfügen 36 | for (int i = 0; i < this.size(); ++i) { 37 | MserverSearchTask ds = this.get(i); 38 | if (ds.spaeter(d)) { 39 | add(i, d); 40 | return true; 41 | } 42 | } 43 | return super.add(d); 44 | } 45 | 46 | public MserverSearchTask erste() { 47 | // liefert den ersten Job der in der Zukunft liegt 48 | Date now = new Date(); 49 | MserverSearchTask akt = null; 50 | while ((akt = this.poll()) != null) { 51 | if (akt.sofortSuchen()) { 52 | return akt; 53 | } 54 | Date d = akt.getTimeHeute(); 55 | if (d.compareTo(now) >= 0) { 56 | return akt; 57 | } 58 | } 59 | return akt; 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /src/main/java/mServer/daten/MserverListeUpload.java: -------------------------------------------------------------------------------- 1 | /* 2 | * MediathekView 3 | * Copyright (C) 2008 W. Xaver 4 | * W.Xaver[at]googlemail.com 5 | * http://zdfmediathk.sourceforge.net/ 6 | * 7 | * This program is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * any later version. 11 | * 12 | * This program is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with this program. If not, see . 19 | */ 20 | package mServer.daten; 21 | 22 | import java.util.LinkedList; 23 | 24 | public class MserverListeUpload extends LinkedList { 25 | 26 | private static final long serialVersionUID = 1L; 27 | 28 | @Override 29 | public boolean add(MserverDatenUpload mvsDatenUpload) { 30 | String[] was = mvsDatenUpload.arr[MserverDatenUpload.UPLOAD_LISTE_NR].split(","); 31 | for (String s : was) { 32 | MserverDatenUpload u = mvsDatenUpload.getCopy(); 33 | u.arr[MserverDatenUpload.UPLOAD_LISTE_NR] = s; 34 | super.add(u); 35 | } 36 | return true; 37 | } 38 | 39 | } 40 | -------------------------------------------------------------------------------- /src/main/java/mServer/tool/DateWithoutTimeComparer.java: -------------------------------------------------------------------------------- 1 | package mServer.tool; 2 | 3 | import java.util.Calendar; 4 | 5 | public class DateWithoutTimeComparer { 6 | 7 | private DateWithoutTimeComparer() { 8 | } 9 | 10 | public static int compare(Calendar c1, Calendar c2) { 11 | if (c1.get(Calendar.YEAR) != c2.get(Calendar.YEAR)) { 12 | return c1.get(Calendar.YEAR) - c2.get(Calendar.YEAR); 13 | } 14 | if (c1.get(Calendar.MONTH) != c2.get(Calendar.MONTH)) { 15 | return c1.get(Calendar.MONTH) - c2.get(Calendar.MONTH); 16 | } 17 | return c1.get(Calendar.DAY_OF_MONTH) - c2.get(Calendar.DAY_OF_MONTH); 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /src/main/java/mServer/tool/EnvManager.java: -------------------------------------------------------------------------------- 1 | /* 2 | * MediathekView 3 | * Copyright (C) 2020 A. Finkhaeuser 4 | */ 5 | package mServer.tool; 6 | 7 | 8 | public class EnvManager { 9 | 10 | private static final EnvManager instance = new EnvManager(); 11 | 12 | 13 | private static final String STRING_ENV_KEY_ENABLED = "METRIC_ENABLED"; 14 | private static final String STRING_ENV_KEY_TELEGRAFURL = "METRIC_TELEGRAFURL"; 15 | 16 | 17 | public boolean envMetricEnabled; 18 | 19 | public String envMetricUrl; 20 | 21 | 22 | private EnvManager() { 23 | envMetricEnabled = isEnvSet(STRING_ENV_KEY_ENABLED); 24 | envMetricUrl = getEnvValue(STRING_ENV_KEY_TELEGRAFURL); 25 | } 26 | 27 | public static EnvManager getInstance() { 28 | return instance; 29 | } 30 | 31 | private boolean isEnvSet(String envName) { 32 | String envvalue = System.getenv(envName); 33 | 34 | return envvalue != null && ( 35 | envvalue.equalsIgnoreCase("y") 36 | || envvalue.equals("1") 37 | || envvalue.equalsIgnoreCase("yes") 38 | || envvalue.equalsIgnoreCase("true") 39 | ); 40 | } 41 | 42 | private String getEnvValue(String envName) { 43 | String envvalue = System.getenv(envName); 44 | 45 | if(envvalue == null) return ""; 46 | return envvalue; 47 | } 48 | 49 | } 50 | -------------------------------------------------------------------------------- /src/main/java/mServer/tool/HashFileWriter.java: -------------------------------------------------------------------------------- 1 | package mServer.tool; 2 | 3 | import java.io.BufferedWriter; 4 | import java.io.IOException; 5 | import java.nio.charset.StandardCharsets; 6 | import java.nio.file.Files; 7 | import java.nio.file.Path; 8 | import java.nio.file.Paths; 9 | 10 | import de.mediathekview.mlib.tool.Log; 11 | 12 | /** 13 | * Schreibt den Filmlistenhash bzw. die Filmlisten ID in die Datei filmliste.hash. 14 | * @author nicklas 15 | * 16 | */ 17 | public class HashFileWriter { 18 | private static final String FILE_NAME = "filmliste.id"; 19 | private Path baseDir; 20 | 21 | /** 22 | * @param baseDirPath Der Pfad zum Verzeichnis in das geschrieben werden soll. 23 | */ 24 | public HashFileWriter(String baseDirPath) { 25 | baseDir = Paths.get(baseDirPath); 26 | } 27 | 28 | /** 29 | * Schreibt die gegebene ID in die Filmlist Hash Datei. 30 | * @param id Die zu schreibende ID. 31 | */ 32 | public void writeHash(String id) { 33 | try (BufferedWriter fileWriter = Files.newBufferedWriter(baseDir.resolve(FILE_NAME), StandardCharsets.UTF_8)) { 34 | fileWriter.write(id); 35 | } catch (IOException ioException) { 36 | Log.errorLog(494461668, ioException, "Der Filmlisten Hash konnte nicht geschrieben werden."); 37 | } 38 | } 39 | 40 | } 41 | -------------------------------------------------------------------------------- /src/main/java/mServer/tool/MserverTimer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * MediathekView 3 | * Copyright (C) 2008 W. Xaver 4 | * W.Xaver[at]googlemail.com 5 | * http://zdfmediathk.sourceforge.net/ 6 | * 7 | * This program is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * any later version. 11 | * 12 | * This program is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with this program. If not, see . 19 | */ 20 | package mServer.tool; 21 | 22 | import java.util.concurrent.TimeUnit; 23 | 24 | import mServer.MServer; 25 | 26 | public class MserverTimer extends Thread { 27 | 28 | private final MServer mserver; 29 | 30 | public MserverTimer(MServer mserver) { 31 | this.mserver = mserver; 32 | setName("MServerTimer"); 33 | } 34 | 35 | public void ping() { 36 | if (!mserver.isSuchen()) { 37 | // nicht beschäftigt 38 | mserver.laufen(); 39 | } 40 | } 41 | 42 | @Override 43 | public synchronized void run() { 44 | while (true) { 45 | ping(); 46 | // let's stop when there was an interrupt 47 | if (Thread.currentThread().isInterrupted()) { 48 | break; 49 | } 50 | schlafen(); 51 | } 52 | } 53 | 54 | private void schlafen() { 55 | try { 56 | TimeUnit.SECONDS.sleep(10); 57 | } catch (InterruptedException ignored) { 58 | } 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/main/java/mServer/tool/MserverWarten.java: -------------------------------------------------------------------------------- 1 | /* 2 | * MediathekView 3 | * Copyright (C) 2008 W. Xaver 4 | * W.Xaver[at]googlemail.com 5 | * http://zdfmediathk.sourceforge.net/ 6 | * 7 | * This program is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * any later version. 11 | * 12 | * This program is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with this program. If not, see . 19 | */ 20 | package mServer.tool; 21 | 22 | import java.util.concurrent.TimeUnit; 23 | 24 | /** 25 | * 26 | * @author emil 27 | */ 28 | public class MserverWarten { 29 | 30 | public synchronized void sekundenWarten(int sekunden) { 31 | MserverLog.systemMeldung("Warten: " + String.valueOf(sekunden) + " Sekunden"); 32 | try { 33 | while (sekunden > 0) { 34 | TimeUnit.SECONDS.sleep(1); 35 | sekunden--; 36 | System.out.print("\r"); 37 | System.out.print(String.valueOf(sekunden)); 38 | } 39 | } catch (Exception ex) { 40 | MserverLog.fehlerMeldung(347895642, MserverWarten.class.getName(), "Warten nach dem Suchen", ex); 41 | } 42 | System.out.println(""); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/main/resources/log4j2.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 14 | 15 | 16 | 17 | %d{HH:mm:ss} %m%n 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /src/main/resources/mServer/crawler/gui/PanelDel.fxml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | -------------------------------------------------------------------------------- /src/main/resources/mServer/crawler/gui/PanelTool.fxml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 33 | 38 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | -------------------------------------------------------------------------------- /src/main/resources/mServer/crawler/gui/msearchgui.css: -------------------------------------------------------------------------------- 1 | /* 2 | * Empty Stylesheet file. 3 | */ 4 | 5 | #fxPaneFilmList { 6 | -fx-background-color: #cccccc; 7 | -fx-background-radius: 10; 8 | -fx-background-insets: -5; 9 | -fx-padding: 10; 10 | -fx-border-radius: 10; 11 | -fx-border-color: #666666; 12 | -fx-border-width: 3; 13 | -fx-effect: dropshadow(gaussian, #665555, 10, 0, 3, 3); 14 | } 15 | -------------------------------------------------------------------------------- /src/main/resources/mServer/crawler/gui/paneldel.css: -------------------------------------------------------------------------------- 1 | /* 2 | * Empty Stylesheet file. 3 | */ 4 | 5 | .mainFxmlClass { 6 | 7 | } 8 | -------------------------------------------------------------------------------- /src/main/resources/mServer/crawler/gui/panelsearch.css: -------------------------------------------------------------------------------- 1 | /* 2 | * Empty Stylesheet file. 3 | */ 4 | 5 | .mainFxmlClass { 6 | 7 | } 8 | -------------------------------------------------------------------------------- /src/main/resources/mServer/crawler/gui/paneltool.css: -------------------------------------------------------------------------------- 1 | /* 2 | * Empty Stylesheet file. 3 | */ 4 | 5 | .mainFxmlClass { 6 | 7 | } 8 | -------------------------------------------------------------------------------- /src/test/developTest/java/mServer/crawler/sender/arte/ArteCategoryFilmListDeserializerTest.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.arte; 2 | 3 | import com.google.gson.JsonObject; 4 | import mServer.test.JsonFileReader; 5 | import org.hamcrest.Matchers; 6 | import org.junit.Test; 7 | import org.junit.runner.RunWith; 8 | import org.junit.runners.Parameterized; 9 | 10 | import java.util.Arrays; 11 | import java.util.Collection; 12 | import java.util.Set; 13 | 14 | import static org.hamcrest.CoreMatchers.equalTo; 15 | import static org.hamcrest.CoreMatchers.notNullValue; 16 | import static org.junit.Assert.assertThat; 17 | 18 | @RunWith(Parameterized.class) 19 | public class ArteCategoryFilmListDeserializerTest { 20 | 21 | private final String jsonFile; 22 | private final String[] expectedProgramIds; 23 | private final boolean expectedHasNextPage; 24 | private final String expectedNextPageUrl; 25 | private final ArteCategoryFilmListDeserializer target; 26 | public ArteCategoryFilmListDeserializerTest(String aJsonFile, String[] aProgramIds, boolean aNextPage, String nextPageUrl) { 27 | jsonFile = aJsonFile; 28 | expectedProgramIds = aProgramIds; 29 | expectedHasNextPage = aNextPage; 30 | expectedNextPageUrl = nextPageUrl; 31 | this.target = new ArteCategoryFilmListDeserializer(); 32 | } 33 | 34 | @Parameterized.Parameters 35 | public static Collection data() { 36 | return Arrays.asList(new Object[][]{ 37 | {"/arte/arte_category.json", new String[]{"112511-000-A", "047389-000-A", "109066-000-A", "082669-000-A", "003982-000-A", "021109-000-A"}, false, null}, 38 | {"/arte/arte_video_list1.json", new String[]{"033559-000-A","078154-000-A", "101398-000-A", "109332-000-A", "111063-000-A"}, true, "https://www.arte.tv/api/rproxy/emac/v4/de/web/zones/daeadc71-4306-411a-8590-1c1f484ef5aa/content?abv=B&authorizedCountry=DE&page=2&pageId=MOST_RECENT&zoneIndexInPage=0"} 39 | }); 40 | } 41 | 42 | @Test 43 | public void testDeserialize() { 44 | 45 | JsonObject jsonObject = JsonFileReader.readJson(jsonFile); 46 | 47 | ArteCategoryFilmsDTO actual = target.deserialize(jsonObject, ArteCategoryFilmsDTO.class, null); 48 | 49 | assertThat(actual, notNullValue()); 50 | assertThat(actual.hasNextPage(), equalTo(expectedHasNextPage)); 51 | Set actualProgramIds = actual.getProgramIds(); 52 | assertThat(actualProgramIds, Matchers.containsInAnyOrder(expectedProgramIds)); 53 | assertThat(actual.getNextPageUrl(), equalTo(expectedNextPageUrl)); 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /src/test/developTest/java/mServer/crawler/sender/arte/ArteSubPageDeserializerTest.java: -------------------------------------------------------------------------------- 1 | package mServer.crawler.sender.arte; 2 | 3 | import com.google.gson.JsonObject; 4 | import mServer.test.JsonFileReader; 5 | import org.hamcrest.Matchers; 6 | import org.junit.Test; 7 | import org.junit.runner.RunWith; 8 | import org.junit.runners.Parameterized; 9 | 10 | import java.util.Arrays; 11 | import java.util.Collection; 12 | import java.util.Set; 13 | 14 | import static org.hamcrest.CoreMatchers.equalTo; 15 | import static org.hamcrest.CoreMatchers.notNullValue; 16 | import static org.junit.Assert.assertThat; 17 | 18 | @RunWith(Parameterized.class) 19 | public class ArteSubPageDeserializerTest { 20 | 21 | private final String jsonFile; 22 | private final String[] expectedProgramIds; 23 | private final boolean expectedHasNextPage; 24 | private final String expectedNextPageUrl; 25 | private final ArteSubPageDeserializer target; 26 | public ArteSubPageDeserializerTest(String aJsonFile, String[] aProgramIds, boolean aNextPage, String nextPageUrl) { 27 | jsonFile = aJsonFile; 28 | expectedProgramIds = aProgramIds; 29 | expectedHasNextPage = aNextPage; 30 | expectedNextPageUrl = nextPageUrl; 31 | this.target = new ArteSubPageDeserializer(); 32 | } 33 | 34 | @Parameterized.Parameters 35 | public static Collection data() { 36 | return Arrays.asList(new Object[][]{ 37 | {"/arte/arte_video_list2.json", new String[]{"099708-000-A", "098846-000-A", "111648-001-A", "112235-000-A", "113043-139-A"}, true, "https://www.arte.tv/api/rproxy/emac/v4/de/web/zones/82b597d7-a83b-4dd8-bea8-ad71675fdf23/content?abv=A&authorizedCountry=DE&page=3&pageId=MOST_VIEWED&zoneIndexInPage=0"}, 38 | {"/arte/arte_video_list_last.json", new String[]{"102805-000-A","104017-000-A", "106273-006-A"}, false, null} 39 | }); 40 | } 41 | 42 | @Test 43 | public void testDeserialize() { 44 | 45 | JsonObject jsonObject = JsonFileReader.readJson(jsonFile); 46 | 47 | ArteCategoryFilmsDTO actual = target.deserialize(jsonObject, ArteCategoryFilmsDTO.class, null); 48 | 49 | assertThat(actual, notNullValue()); 50 | assertThat(actual.hasNextPage(), equalTo(expectedHasNextPage)); 51 | Set actualProgramIds = actual.getProgramIds(); 52 | assertThat(actualProgramIds, Matchers.containsInAnyOrder(expectedProgramIds)); 53 | assertThat(actual.getNextPageUrl(), equalTo(expectedNextPageUrl)); 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /src/test/developTest/java/mServer/test/JsonFileReader.java: -------------------------------------------------------------------------------- 1 | package mServer.test; 2 | 3 | import com.google.gson.Gson; 4 | import com.google.gson.JsonArray; 5 | import com.google.gson.JsonObject; 6 | import com.google.gson.JsonSyntaxException; 7 | import java.io.IOException; 8 | import java.net.URI; 9 | import java.net.URISyntaxException; 10 | import java.nio.file.Files; 11 | import java.nio.file.Path; 12 | import java.nio.file.Paths; 13 | import static org.junit.Assert.fail; 14 | 15 | /** 16 | * Reads a json file 17 | */ 18 | public class JsonFileReader { 19 | 20 | private JsonFileReader() {} 21 | 22 | public static JsonObject readJson(String filePath) { 23 | try { 24 | URI u = JsonFileReader.class.getResource(filePath).toURI(); 25 | Path path = Paths.get(u); 26 | String jsonOutput = new String(Files.readAllBytes(path)); 27 | return new Gson().fromJson(jsonOutput, JsonObject.class); 28 | } catch(JsonSyntaxException | IOException | URISyntaxException ex) { 29 | fail("Exception reading jsonFile " + filePath + ": " + ex.getMessage()); 30 | } 31 | return null; 32 | } 33 | 34 | public static JsonArray readJsonArray(String filePath) { 35 | try { 36 | URI u = JsonFileReader.class.getResource(filePath).toURI(); 37 | Path path = Paths.get(u); 38 | String jsonOutput = new String(Files.readAllBytes(path)); 39 | return new Gson().fromJson(jsonOutput, JsonArray.class); 40 | } catch(JsonSyntaxException | IOException | URISyntaxException ex) { 41 | fail("Exception reading jsonFile " + filePath + ": " + ex.getMessage()); 42 | } 43 | return null; 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/test/developTest/java/mServer/test/TestFileReader.java: -------------------------------------------------------------------------------- 1 | package mServer.test; 2 | 3 | import java.io.IOException; 4 | import java.net.URI; 5 | import java.net.URISyntaxException; 6 | import java.nio.file.Files; 7 | import java.nio.file.Path; 8 | import java.nio.file.Paths; 9 | import static org.junit.Assert.fail; 10 | 11 | public class TestFileReader { 12 | private TestFileReader() {} 13 | 14 | public static String readFile(String filePath) { 15 | try { 16 | URI u = TestFileReader.class.getResource(filePath).toURI(); 17 | Path path = Paths.get(u); 18 | return new String(Files.readAllBytes(path)); 19 | } catch(IOException | URISyntaxException ex) { 20 | fail("Exception reading file " + filePath + ": " + ex.getMessage()); 21 | } 22 | return null; 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /src/test/developTest/java/mServer/tool/HashFileWriterTest.java: -------------------------------------------------------------------------------- 1 | package mServer.tool; 2 | 3 | import java.io.IOException; 4 | import java.nio.charset.StandardCharsets; 5 | import java.nio.file.Files; 6 | import java.nio.file.Path; 7 | import java.nio.file.Paths; 8 | import java.time.OffsetDateTime; 9 | 10 | import org.hamcrest.Matchers; 11 | import org.junit.After; 12 | import org.junit.Assert; 13 | import org.junit.Test; 14 | 15 | public class HashFileWriterTest { 16 | private static final String FILE_NAME_FILMLISTE_HASH = "filmliste.id"; 17 | //private static final Path basePath = Paths.get(HashFileWriterTest.class.getResource("").getPath()); 18 | private static final Path basePath = Paths.get("."); 19 | 20 | @Test 21 | public void testWriteHash() throws IOException { 22 | String id = OffsetDateTime.now().toInstant().toString(); 23 | new HashFileWriter(basePath.toString()).writeHash(id); 24 | Assert.assertThat("Das schreiben der Test Filmlisten ID hat nicht geklappt.", 25 | Files.readAllLines(basePath.resolve(FILE_NAME_FILMLISTE_HASH), StandardCharsets.UTF_8).get(0), 26 | Matchers.equalTo(id)); 27 | } 28 | 29 | @After 30 | public void deleteIfExist() throws IOException { 31 | Path filmlistIdPath = basePath.resolve(FILE_NAME_FILMLISTE_HASH); 32 | if (Files.exists(filmlistIdPath)) { 33 | Files.delete(filmlistIdPath); 34 | } 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/test/performanceTest/java/PerformanceTest.java: -------------------------------------------------------------------------------- 1 | import etm.core.configuration.BasicEtmConfigurator; 2 | import etm.core.configuration.EtmManager; 3 | import etm.core.monitor.EtmMonitor; 4 | import etm.core.renderer.SimpleTextRenderer; 5 | import mServer.Main; 6 | import org.junit.After; 7 | import org.junit.Before; 8 | import org.junit.Test; 9 | 10 | import java.net.URISyntaxException; 11 | import java.nio.file.Path; 12 | import java.nio.file.Paths; 13 | 14 | /** 15 | * A test to test the performance of the crawler. 16 | */ 17 | public class PerformanceTest 18 | { 19 | private static final String TEST_REOSURCES_FOLDERPATH = "/"; 20 | private EtmMonitor performanceMonitor; 21 | private Path testConfigPath; 22 | 23 | @Before 24 | public void setUp() throws URISyntaxException 25 | { 26 | BasicEtmConfigurator.configure(); 27 | performanceMonitor = EtmManager.getEtmMonitor(); 28 | performanceMonitor.start(); 29 | 30 | testConfigPath = Paths.get(getClass().getResource(TEST_REOSURCES_FOLDERPATH).toURI()); 31 | } 32 | 33 | @After 34 | public void tearDown() 35 | { 36 | performanceMonitor.stop(); 37 | } 38 | 39 | @Test 40 | public void testCrawlerPerformance() 41 | { 42 | Main.main(new String[]{testConfigPath.toAbsolutePath().toString()}); 43 | performanceMonitor.render(new SimpleTextRenderer()); 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/test/performanceTest/resources/mserver.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | MServer 5 | 6 | 7 | http://verteiler1.mediathekview.de/Filmliste-diff.xz 8 | 9 | 10 | 11 | http://verteiler1.mediathekview.de/Filmliste-akt.xz 12 | 13 | 14 | live-streams.json 15 | 16 | 20 | 21 | 22 | 23 | filme-org.xz 24 | 25 | 26 | 27 | 1 28 | 29 | 30 | 31 | 32 | 33 | 34 | lang 35 | 36 | 37 | neu 38 | 39 | 40 | sofort 41 | 42 | 43 | 60 44 | 45 | 46 | true 47 | 48 | 49 | 50 | 51 | 58 | 59 | 60 | --------------------------------------------------------------------------------