├── .gitmodules ├── PatreonDownloader.App ├── Enums │ └── LogLevel.cs ├── settings.json ├── Properties │ ├── PublishProfiles │ │ ├── net3.1-win-x64-release.pubxml │ │ └── net3.1-linux-x64-release.pubxml │ └── AssemblyInfo.cs ├── PatreonDownloader.App.csproj ├── UpdateChecker.cs ├── NlogManager.cs ├── Models │ └── CommandLineOptions.cs └── Program.cs ├── PatreonDownloader.Implementation ├── Enums │ └── PatreonCrawledUrlType.cs ├── ParsingResult.cs ├── Interfaces │ └── IRemoteFilenameRetriever.cs ├── Models │ ├── JSONObjects │ │ ├── Campaign.cs │ │ └── Posts.cs │ └── PatreonDownloaderSettings.cs ├── PatreonDownloader.Implementation.csproj ├── Helpers │ ├── HashHelper.cs │ └── PostSubdirectoryHelper.cs ├── PatreonWebDownloader.cs ├── PatreonCrawlTargetInfo.cs ├── Properties │ └── AssemblyInfo.cs ├── PatreonDownloaderModule.cs ├── PatreonCookieValidator.cs ├── PatreonCrawledUrl.cs ├── PatreonRemoteFilenameRetriever.cs ├── PatreonCrawlTargetInfoRetriever.cs ├── PatreonDefaultPlugin.cs ├── PatreonCrawledUrlProcessor.cs └── PatreonPageCrawler.cs ├── docs ├── MEGA.md ├── GOOGLEDRIVE.md ├── REMOTEBROWSER.md └── BUILDING.md ├── .github └── FUNDING.yml ├── PatreonDownloader.Tests ├── PatreonDownloader.Tests.csproj ├── PostSubdirectoryHelperTests.cs └── PatreonCrawledUrlProcessorTests.cs ├── LICENSE.md ├── .gitattributes ├── README.md ├── PatreonDownloader.sln └── .gitignore /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "submodules/UniversalDownloaderPlatform"] 2 | path = submodules/UniversalDownloaderPlatform 3 | url = https://github.com/AlexCSDev/UniversalDownloaderPlatform.git 4 | branch = master 5 | -------------------------------------------------------------------------------- /PatreonDownloader.App/Enums/LogLevel.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | using System.Threading.Tasks; 6 | 7 | namespace PatreonDownloader.App.Enums 8 | { 9 | internal enum LogLevel 10 | { 11 | Default, 12 | Debug, 13 | Trace 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /PatreonDownloader.Implementation/Enums/PatreonCrawledUrlType.cs: -------------------------------------------------------------------------------- 1 | namespace PatreonDownloader.Implementation.Enums 2 | { 3 | public enum PatreonCrawledUrlType 4 | { 5 | Unknown, 6 | PostFile, 7 | PostAttachment, 8 | PostMedia, 9 | ExternalUrl, 10 | CoverFile, 11 | AvatarFile 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /docs/MEGA.md: -------------------------------------------------------------------------------- 1 | # Configuring Mega.nz plugin 2 | This plugin will work without any additional configuration with all download limits enforced by the service. 3 | 4 | If you have premium account you can configure it by renaming mega_credentials_example.json into mega_credentials.json and setting your email and password there. Invalid email and password will prevent mega files from being downloaded. -------------------------------------------------------------------------------- /PatreonDownloader.App/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "UrlBlackList": "patreon.com/posts/|tmblr.co/|t.umblr.com/redirect|mailto:|postybirb.com|picarto.tv|deviantart.com|https://twitter.com|https://steamcommunity.com|http://www.furaffinity.net|https://e621.net/post/show|https://e621.net/posts/|trello.com|https://smutba.se|https://sfmlab.com|http://fav.me|https://inkbunny.net|https://www.pixiv.net/|pixiv.me|https://x.com|https://www.x.com|http://x.com|http://www.x.com" 3 | } -------------------------------------------------------------------------------- /PatreonDownloader.Implementation/ParsingResult.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | 3 | namespace PatreonDownloader.Implementation 4 | { 5 | /// 6 | /// Represents one crawled page with all results and link to the next page 7 | /// 8 | internal class ParsingResult 9 | { 10 | public List CrawledUrls { get; set; } 11 | public string NextPage { get; set; } 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /PatreonDownloader.Implementation/Interfaces/IRemoteFilenameRetriever.cs: -------------------------------------------------------------------------------- 1 | using System.Threading.Tasks; 2 | using UniversalDownloaderPlatform.Common.Interfaces.Models; 3 | 4 | namespace PatreonDownloader.Implementation.Interfaces 5 | { 6 | interface IRemoteFilenameRetriever 7 | { 8 | /// 9 | /// Initialization function, called on every PatreonDownloader.Download call 10 | /// 11 | /// 12 | Task BeforeStart(IUniversalDownloaderPlatformSettings settings); 13 | Task RetrieveRemoteFileName(string url); 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /docs/GOOGLEDRIVE.md: -------------------------------------------------------------------------------- 1 | # Configuring Google Drive plugin 2 | In order to use google drive plugin you need to configure a few things: 3 | 4 | 1. Go to the https://developers.google.com/drive/api/v3/quickstart/dotnet 5 | 2. Press "Enable the Drive API" button 6 | 3. Select "Desktop app" and press "Create" 7 | 4. Press "DOWNLOAD CLIENT CONFIGURATION" button 8 | 5. Rename downloaded file to gd_credentials.json and put it into the PatreonDownloader folder 9 | 6. As soon as needed PatreonDownloader will open authorization dialog where you would need to login into your google drive account and allow "Quickstart" application access to your account. **Don't worry, no one will be able to access your account data as you are using your own client credentials.** -------------------------------------------------------------------------------- /PatreonDownloader.Implementation/Models/JSONObjects/Campaign.cs: -------------------------------------------------------------------------------- 1 | using Newtonsoft.Json; 2 | 3 | // This file contains all classes used for representing deserialized json response of "campaign" api endpoint 4 | namespace PatreonDownloader.Implementation.Models.JSONObjects.Campaign 5 | { 6 | public class Attributes 7 | { 8 | [JsonProperty("avatar_photo_url")] 9 | public string AvatarUrl; 10 | 11 | [JsonProperty("cover_photo_url")] 12 | public string CoverUrl; 13 | 14 | [JsonProperty("name")] 15 | public string Name; 16 | } 17 | 18 | public class Data 19 | { 20 | [JsonProperty("attributes")] 21 | public Attributes Attributes; 22 | } 23 | 24 | public class Root 25 | { 26 | [JsonProperty("data")] 27 | public Data Data; 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] 4 | patreon: # Replace with a single Patreon username 5 | open_collective: # Replace with a single Open Collective username 6 | ko_fi: alexcsdev # Replace with a single Ko-fi username 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | otechie: # Replace with a single Otechie username 12 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry 13 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] 14 | -------------------------------------------------------------------------------- /PatreonDownloader.App/Properties/PublishProfiles/net3.1-win-x64-release.pubxml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 7 | FileSystem 8 | Release 9 | Any CPU 10 | netcoreapp3.1 11 | bin\publish\net3.1-win-x64-release 12 | win-x64 13 | true 14 | <_IsPortable>false 15 | False 16 | False 17 | False 18 | 19 | -------------------------------------------------------------------------------- /PatreonDownloader.App/Properties/PublishProfiles/net3.1-linux-x64-release.pubxml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 7 | FileSystem 8 | Release 9 | Any CPU 10 | netcoreapp3.1 11 | bin\publish\net3.1-linux-x64-release 12 | linux-x64 13 | true 14 | <_IsPortable>false 15 | False 16 | False 17 | False 18 | 19 | -------------------------------------------------------------------------------- /PatreonDownloader.App/PatreonDownloader.App.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Exe 5 | net9.0 6 | false 7 | true 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | PreserveNewest 23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /PatreonDownloader.Implementation/PatreonDownloader.Implementation.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | net9.0 5 | false 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /PatreonDownloader.Tests/PatreonDownloader.Tests.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | net9.0 5 | 6 | false 7 | 8 | 9 | 10 | 11 | 12 | 13 | runtime; build; native; contentfiles; analyzers; buildtransitive 14 | all 15 | 16 | 17 | runtime; build; native; contentfiles; analyzers; buildtransitive 18 | all 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019-2024 Aleksey Tsutsey & Contributors 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /PatreonDownloader.Implementation/Helpers/HashHelper.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Security.Cryptography; 5 | using System.Text; 6 | using System.Threading.Tasks; 7 | 8 | namespace PatreonDownloader.Implementation.Helpers 9 | { 10 | internal static class HashHelper 11 | { 12 | //https://www.c-sharpcorner.com/article/compute-sha256-hash-in-c-sharp/ 13 | public static string ComputeSha256Hash(string rawData) 14 | { 15 | // Create a SHA256 16 | using (SHA256 sha256Hash = SHA256.Create()) 17 | { 18 | // ComputeHash - returns byte array 19 | byte[] bytes = sha256Hash.ComputeHash(Encoding.UTF8.GetBytes(rawData)); 20 | 21 | // Convert byte array to a string 22 | StringBuilder builder = new StringBuilder(); 23 | for (int i = 0; i < bytes.Length; i++) 24 | { 25 | builder.Append(bytes[i].ToString("x2")); 26 | } 27 | return builder.ToString(); 28 | } 29 | } 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /PatreonDownloader.App/UpdateChecker.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Net.Http; 5 | using System.Reflection; 6 | using System.Text; 7 | using System.Threading.Tasks; 8 | 9 | namespace PatreonDownloader.App 10 | { 11 | internal class UpdateChecker 12 | { 13 | private readonly HttpClient _httpClient; 14 | private const string UpdateUrl = "https://alexcsdev.github.io/pd_version.txt"; 15 | public UpdateChecker() 16 | { 17 | _httpClient = new HttpClient(); 18 | } 19 | 20 | public async Task<(bool, string)> IsNewVersionAvailable() 21 | { 22 | string[] remoteVersionData = (await _httpClient.GetStringAsync(UpdateUrl)).Split("|"); 23 | string remoteVersion = remoteVersionData[0]; 24 | string message = remoteVersionData.Length > 1 ? remoteVersionData[1] : null; 25 | Version currentVersion = Assembly.GetEntryAssembly().GetName().Version; 26 | 27 | return (remoteVersion != currentVersion.Major.ToString(), !string.IsNullOrWhiteSpace(message) ? message : null); 28 | } 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /PatreonDownloader.Implementation/PatreonWebDownloader.cs: -------------------------------------------------------------------------------- 1 | using System.Threading.Tasks; 2 | using UniversalDownloaderPlatform.Common.Interfaces; 3 | using UniversalDownloaderPlatform.DefaultImplementations; 4 | using UniversalDownloaderPlatform.DefaultImplementations.Interfaces; 5 | 6 | namespace PatreonDownloader.Implementation 7 | { 8 | internal class PatreonWebDownloader : WebDownloader 9 | { 10 | public PatreonWebDownloader(IRemoteFileSizeChecker remoteFileSizeChecker, ICaptchaSolver captchaSolver) : base(remoteFileSizeChecker, captchaSolver) 11 | { 12 | 13 | } 14 | 15 | public override async Task DownloadFile(string url, string path, string refererUrl = null) 16 | { 17 | if (string.IsNullOrWhiteSpace(refererUrl)) 18 | refererUrl = "https://www.patreon.com"; 19 | 20 | 21 | await base.DownloadFile(url, path, refererUrl); 22 | } 23 | 24 | public override async Task DownloadString(string url, string refererUrl = null) 25 | { 26 | if (string.IsNullOrWhiteSpace(refererUrl)) 27 | refererUrl = "https://www.patreon.com"; 28 | 29 | 30 | return await base.DownloadString(url, refererUrl); 31 | } 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /PatreonDownloader.Implementation/PatreonCrawlTargetInfo.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | using System.IO; 3 | using UniversalDownloaderPlatform.Common.Interfaces.Models; 4 | 5 | namespace PatreonDownloader.Implementation 6 | { 7 | public class PatreonCrawlTargetInfo : ICrawlTargetInfo 8 | { 9 | private static readonly HashSet InvalidFilenameCharacters; 10 | 11 | static PatreonCrawlTargetInfo() 12 | { 13 | InvalidFilenameCharacters = new HashSet(Path.GetInvalidFileNameChars()); 14 | } 15 | 16 | public long Id { get; set; } 17 | public string AvatarUrl { get; set; } 18 | public string CoverUrl { get; set; } 19 | 20 | private string _name; 21 | public string Name 22 | { 23 | get => _name; 24 | set 25 | { 26 | _name = value; 27 | _saveDirectory = _name; 28 | foreach (char c in InvalidFilenameCharacters) 29 | { 30 | _saveDirectory = _saveDirectory.Replace(c, '_'); 31 | } 32 | } 33 | } 34 | 35 | private string _saveDirectory; 36 | public string SaveDirectory => _saveDirectory; 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /PatreonDownloader.App/Properties/AssemblyInfo.cs: -------------------------------------------------------------------------------- 1 | using System.Reflection; 2 | using System.Runtime.CompilerServices; 3 | using System.Runtime.InteropServices; 4 | 5 | // General Information about an assembly is controlled through the following 6 | // set of attributes. Change these attribute values to modify the information 7 | // associated with an assembly. 8 | [assembly: AssemblyTitle("Patreon Downloader")] 9 | [assembly: AssemblyDescription("")] 10 | [assembly: AssemblyConfiguration("")] 11 | [assembly: AssemblyCompany("")] 12 | [assembly: AssemblyProduct("Patreon Downloader")] 13 | [assembly: AssemblyCopyright("Copyright 2019-2025 Aleksey Tsutsey & Contributors")] 14 | [assembly: AssemblyTrademark("")] 15 | [assembly: AssemblyCulture("")] 16 | 17 | // Setting ComVisible to false makes the types in this assembly not visible 18 | // to COM components. If you need to access a type in this assembly from 19 | // COM, set the ComVisible attribute to true on that type. 20 | [assembly: ComVisible(false)] 21 | 22 | // The following GUID is for the ID of the typelib if this project is exposed to COM 23 | [assembly: Guid("69b6f4d5-111d-4b42-990b-07db351d8265")] 24 | 25 | // Version information for an assembly consists of the following four values: 26 | // 27 | // Major Version 28 | // Minor Version 29 | // Build Number 30 | // Revision 31 | // 32 | [assembly: AssemblyVersion("30.0.0.0")] 33 | [assembly: AssemblyFileVersion("30.0.0.0")] -------------------------------------------------------------------------------- /PatreonDownloader.Implementation/Properties/AssemblyInfo.cs: -------------------------------------------------------------------------------- 1 | using System.Reflection; 2 | using System.Runtime.CompilerServices; 3 | using System.Runtime.InteropServices; 4 | 5 | // In SDK-style projects such as this one, several assembly attributes that were historically 6 | // defined in this file are now automatically added during build and populated with 7 | // values defined in project properties. For details of which attributes are included 8 | // and how to customise this process see: https://aka.ms/assembly-info-properties 9 | 10 | // General Information about an assembly is controlled through the following 11 | // set of attributes. Change these attribute values to modify the information 12 | // associated with an assembly. 13 | [assembly: AssemblyTitle("Patreon Downloader Implementation Library")] 14 | [assembly: AssemblyDescription("")] 15 | [assembly: AssemblyConfiguration("")] 16 | [assembly: AssemblyCompany("")] 17 | [assembly: AssemblyProduct("Patreon Downloader")] 18 | [assembly: AssemblyCopyright("Copyright 2019-2025 Aleksey Tsutsey & Contributors")] 19 | [assembly: AssemblyTrademark("")] 20 | [assembly: AssemblyCulture("")] 21 | 22 | 23 | // Setting ComVisible to false makes the types in this assembly not visible to COM 24 | // components. If you need to access a type in this assembly from COM, set the ComVisible 25 | // attribute to true on that type. 26 | 27 | [assembly: ComVisible(false)] 28 | 29 | // The following GUID is for the ID of the typelib if this project is exposed to COM. 30 | 31 | [assembly: Guid("11fe4289-dd12-4f48-a571-938e4261f26d")] 32 | 33 | [assembly: InternalsVisibleTo("PatreonDownloader.Tests")] -------------------------------------------------------------------------------- /PatreonDownloader.Implementation/PatreonDownloaderModule.cs: -------------------------------------------------------------------------------- 1 | using Ninject; 2 | using Ninject.Modules; 3 | using PatreonDownloader.Engine; 4 | using PatreonDownloader.Implementation.Interfaces; 5 | using PatreonDownloader.Implementation.Models; 6 | using UniversalDownloaderPlatform.Common.Interfaces; 7 | using UniversalDownloaderPlatform.Common.Interfaces.Models; 8 | using UniversalDownloaderPlatform.Common.Interfaces.Plugins; 9 | using UniversalDownloaderPlatform.DefaultImplementations; 10 | using UniversalDownloaderPlatform.DefaultImplementations.Interfaces; 11 | using UniversalDownloaderPlatform.PuppeteerEngine; 12 | 13 | namespace PatreonDownloader.Implementation 14 | { 15 | public class PatreonDownloaderModule : NinjectModule 16 | { 17 | public override void Load() 18 | { 19 | Kernel.Load(new PuppeteerEngineModule()); 20 | 21 | Bind().To().InSingletonScope(); 22 | Bind().To().InSingletonScope(); 23 | Bind().To().InSingletonScope(); 24 | Bind().To().InSingletonScope(); 25 | Bind().To().InSingletonScope(); 26 | Bind().To().InSingletonScope(); 27 | Bind().To().WhenInjectedInto(); 28 | Bind().To(); 29 | Rebind().To().InSingletonScope(); 30 | } 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /PatreonDownloader.Implementation/Helpers/PostSubdirectoryHelper.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.IO; 3 | using System.Linq; 4 | using System.Text; 5 | using UniversalDownloaderPlatform.Common.Enums; 6 | using UniversalDownloaderPlatform.Common.Helpers; 7 | 8 | namespace PatreonDownloader.Implementation 9 | { 10 | /// 11 | /// Helper used to generate name for post subdirectories 12 | /// 13 | internal class PostSubdirectoryHelper 14 | { 15 | /// 16 | /// Create a sanitized directory name based on supplied name pattern 17 | /// 18 | /// Crawled url with published date, post title and post id 19 | /// Pattern for directory name 20 | /// Limit the directory name length to this amount of characters 21 | /// 22 | public static string CreateNameFromPattern(PatreonCrawledUrl crawledUrl, string pattern, int lengthLimit) 23 | { 24 | string postTitle = crawledUrl.Title?.Trim() ?? "No Title"; 25 | while (postTitle.Length > 1 && postTitle[^1] == '.') 26 | postTitle = postTitle.Remove(postTitle.Length - 1).Trim(); 27 | 28 | string retString = pattern.ToLowerInvariant() 29 | .Replace("%publishedat%", crawledUrl.PublishedAt.ToString("yyyy-MM-dd")) 30 | .Replace("%posttitle%", postTitle) 31 | .Replace("%postid%", crawledUrl.PostId); 32 | 33 | if (retString.Length > lengthLimit) 34 | retString = $"{retString.Substring(0, lengthLimit - 1).Trim()}~"; 35 | 36 | return PathSanitizer.SanitizePath(retString); 37 | } 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /PatreonDownloader.App/NlogManager.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Text; 4 | using NLog; 5 | using NLog.Config; 6 | using NLog.Targets; 7 | 8 | namespace PatreonDownloader.App 9 | { 10 | internal static class NLogManager 11 | { 12 | public static void ReconfigureNLog(Enums.LogLevel logLevel = Enums.LogLevel.Default, bool saveLogs = false) 13 | { 14 | LoggingConfiguration configuration = new LoggingConfiguration(); 15 | ColoredConsoleTarget consoleTarget = new ColoredConsoleTarget("console") 16 | { 17 | Layout = "${longdate} ${uppercase:${level}} ${message}" 18 | }; 19 | configuration.AddTarget(consoleTarget); 20 | 21 | FileTarget fileTarget = new FileTarget("file") 22 | { 23 | FileName = "${basedir}/logs/${shortdate}.log", 24 | Layout = "${longdate} ${uppercase:${level}} [${logger}] ${message}" 25 | }; 26 | configuration.AddTarget(fileTarget); 27 | 28 | LogLevel nlogLogLevel = LogLevel.Info; 29 | switch (logLevel) 30 | { 31 | case Enums.LogLevel.Debug: 32 | nlogLogLevel = LogLevel.Debug; 33 | break; 34 | case Enums.LogLevel.Trace: 35 | nlogLogLevel = LogLevel.Trace; 36 | break; 37 | } 38 | 39 | configuration.AddRule(nlogLogLevel, LogLevel.Fatal, consoleTarget, nlogLogLevel != LogLevel.Info ? "*" : "PatreonDownloader.App.*"); 40 | if(saveLogs) 41 | configuration.AddRule(nlogLogLevel, LogLevel.Fatal, fileTarget, nlogLogLevel != LogLevel.Info ? "*" : "PatreonDownloader.App.*"); 42 | //configuration.AddRule(debug ? LogLevel.Debug : LogLevel.Info, LogLevel.Fatal, consoleTarget, debug ? "*" : "PatreonDownloader.PuppeteerEngine.*"); 43 | //configuration.AddRuleForAllLevels(fileTarget); 44 | 45 | LogManager.Configuration = configuration; 46 | } 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /PatreonDownloader.Implementation/PatreonCookieValidator.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Globalization; 4 | using System.Linq; 5 | using System.Net; 6 | using System.Text; 7 | using System.Threading.Tasks; 8 | using NLog; 9 | using UniversalDownloaderPlatform.Common.Exceptions; 10 | using UniversalDownloaderPlatform.Common.Interfaces; 11 | 12 | namespace PatreonDownloader.Implementation 13 | { 14 | internal class PatreonCookieValidator : ICookieValidator 15 | { 16 | private readonly IWebDownloader _webDownloader; 17 | private readonly Logger _logger = LogManager.GetCurrentClassLogger(); 18 | 19 | public PatreonCookieValidator(IWebDownloader webDownloader) 20 | { 21 | _webDownloader = webDownloader ?? throw new ArgumentNullException(nameof(webDownloader)); 22 | } 23 | 24 | public async Task ValidateCookies(CookieContainer cookieContainer) 25 | { 26 | if (cookieContainer == null) 27 | throw new ArgumentNullException(nameof(cookieContainer)); 28 | 29 | CookieCollection cookies = cookieContainer.GetCookies(new Uri("https://patreon.com")); 30 | cookies.Add(cookieContainer.GetCookies(new Uri("https://www.patreon.com"))); 31 | 32 | if (cookies["__cf_bm"] == null) 33 | _logger.Warn("\"__cf_bm\" cookie is not found. If you are unable to download content there is an issue with your network and you'll need to use VPN/Proxy (or stop using it if you do)."); 34 | if (cookies["session_id"] == null) 35 | throw new CookieValidationException("session_id cookie not found"); 36 | if (cookies["patreon_device_id"] == null) 37 | throw new CookieValidationException("patreon_device_id cookie not found"); 38 | 39 | string apiResponse = await _webDownloader.DownloadString("https://www.patreon.com/api/current_user"); 40 | 41 | if (apiResponse.ToLower(CultureInfo.InvariantCulture).Contains("\"status\":\"401\"")) 42 | throw new CookieValidationException("current_user api endpoint returned 401 Unauthorized"); 43 | } 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /PatreonDownloader.Implementation/PatreonCrawledUrl.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using PatreonDownloader.Implementation.Enums; 3 | using UniversalDownloaderPlatform.DefaultImplementations.Models; 4 | 5 | namespace PatreonDownloader.Implementation 6 | { 7 | public class PatreonCrawledUrl : CrawledUrl 8 | { 9 | public string PostId { get; set; } 10 | /// 11 | /// Internal patreon file id, only filled for media and attachments 12 | /// 13 | public string FileId { get; set; } 14 | public string Title { get; set; } 15 | public DateTime PublishedAt { get; set; } 16 | public PatreonCrawledUrlType UrlType { get; set; } 17 | 18 | public string UrlTypeAsFriendlyString 19 | { 20 | get 21 | { 22 | switch (UrlType) 23 | { 24 | case PatreonCrawledUrlType.Unknown: 25 | return "Unknown"; 26 | case PatreonCrawledUrlType.PostFile: 27 | return "File"; 28 | case PatreonCrawledUrlType.PostAttachment: 29 | return "Attachment"; 30 | case PatreonCrawledUrlType.PostMedia: 31 | return "Media"; 32 | case PatreonCrawledUrlType.ExternalUrl: 33 | return "External Url"; 34 | case PatreonCrawledUrlType.CoverFile: 35 | return "Cover"; 36 | case PatreonCrawledUrlType.AvatarFile: 37 | return "Avatar"; 38 | default: 39 | throw new ArgumentOutOfRangeException(); 40 | } 41 | } 42 | } 43 | 44 | public object Clone() 45 | { 46 | return new PatreonCrawledUrl 47 | { 48 | PostId = PostId, 49 | FileId = FileId, 50 | Url = Url, 51 | Filename = Filename, 52 | UrlType = UrlType, 53 | Title = Title, 54 | PublishedAt = PublishedAt 55 | }; 56 | } 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /docs/REMOTEBROWSER.md: -------------------------------------------------------------------------------- 1 | # Using remote browser 2 | **This feature is for advanced users only and no support is provided for it. All issues asking for help will be closed unless you can prove that there is an issue with PatreonDownloader itself.** 3 | 4 | PatreonDownloader has support for using remote browser for situations when using local browser is not possible. (gui-less servers, etc) 5 | 6 | In order to use this feature remote machine should be running compatible version of chromium browser. Required chromium version can be determined by running PatreonDownloader locally and checking `Chrome` subfolder. 7 | 8 | Please note that login functionality is disabled while running remote browser mode. Before using remote browser with PatreonDownloader you will need to manually login into your patreon account. 9 | 10 | Example usage: 11 | * Remote side: 12 | ```chrome.exe --headless --disable-gpu --remote-debugging-port=9222 --user-data-dir=C:\chromedata --user-agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36"``` 13 | * PatreonDownloader side: 14 | ```.\PatreonDownloader.App.exe --url https://www.patreon.com/mycreator/posts --remote-browser-address ws://127.0.0.1:9222``` 15 | 16 | Another example posted in the [issue #16](https://github.com/AlexCSDev/PatreonDownloader/issues/16#issuecomment-742842926 "issue #16"): 17 | - SSH to your host, forwarding port `9222`: `ssh -L 9222:127.0.0.1:9222 ` 18 | - Start Chrome with: 19 | ``` 20 | google-chrome-stable \ 21 | --headless \ 22 | --disable-gpu \ 23 | --remote-debugging-port=9222 \ 24 | --user-data-dir=(pwd)/chromedata \ 25 | --user-agent='Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36' 26 | ``` 27 | - Open the Chrome remote debugger by opening Chrome on your local machine and navigating to http://127.0.0.1:9222 28 | - Click the `about:blank` link you see 29 | - You'll be shown a page that looks like Chrome's debug tools, but with an address bar at the top and a large display of the browser's screen. You can interact with the address bar, click things on the screen and type things with your keyboard. 30 | - Enter `https://www.patreon.com` in the debugger's address bar and hit enter 31 | - Use the keyboard and mouse to log in 32 | - Use ```--remote-browser-address ws://127.0.0.1:9222``` parameter to let PatreonDownloader know that remote browser should be used 33 | -------------------------------------------------------------------------------- /PatreonDownloader.Tests/PostSubdirectoryHelperTests.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | using System.Threading.Tasks; 6 | using PatreonDownloader.Implementation; 7 | using PatreonDownloader.Implementation.Enums; 8 | using Xunit; 9 | 10 | namespace PatreonDownloader.Tests 11 | { 12 | public class PostSubdirectoryHelperTests 13 | { 14 | [Fact] 15 | public async Task CreateNameFromPattern_CrawledUrlIsProperlyFilled_ProperStringIsReturned() 16 | { 17 | PatreonCrawledUrl crawledUrl = new PatreonCrawledUrl 18 | { 19 | PostId = "123456", 20 | Title = "Test Post", 21 | PublishedAt = DateTime.Parse("07.07.2020 20:00:15"), 22 | Url = "http://google.com", 23 | Filename = "test.png", 24 | UrlType = PatreonCrawledUrlType.PostMedia 25 | }; 26 | 27 | Assert.Equal("[123456] 2020-07-07 Test Post", PostSubdirectoryHelper.CreateNameFromPattern(crawledUrl, "[%PostId%] %PublishedAt% %PostTitle%", 100)); 28 | } 29 | 30 | [Fact] 31 | public async Task CreateNameFromPattern_PatternIsInWrongCase_ProperStringIsReturned() 32 | { 33 | PatreonCrawledUrl crawledUrl = new PatreonCrawledUrl 34 | { 35 | PostId = "123456", 36 | Title = "Test Post", 37 | PublishedAt = DateTime.Parse("07.07.2020 20:00:15"), 38 | Url = "http://google.com", 39 | Filename = "test.png", 40 | UrlType = PatreonCrawledUrlType.PostMedia 41 | }; 42 | 43 | Assert.Equal("[123456] 2020-07-07 Test Post", PostSubdirectoryHelper.CreateNameFromPattern(crawledUrl, "[%postId%] %PubliSHedAt% %Posttitle%", 100)); 44 | } 45 | 46 | [Fact] 47 | public async Task CreateNameFromPattern_CrawledUrlTitleIsNull_TitleIsReplacedWithNoTitle() 48 | { 49 | PatreonCrawledUrl crawledUrl = new PatreonCrawledUrl 50 | { 51 | PostId = "123456", 52 | Title = null, 53 | PublishedAt = DateTime.Parse("07.07.2020 20:00:15"), 54 | Url = "http://google.com", 55 | Filename = "test.png", 56 | UrlType = PatreonCrawledUrlType.PostMedia 57 | }; 58 | 59 | Assert.Equal("[123456] 2020-07-07 No Title", PostSubdirectoryHelper.CreateNameFromPattern(crawledUrl, "[%PostId%] %PublishedAt% %PostTitle%", 100)); 60 | } 61 | } 62 | } -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Set default behavior to automatically normalize line endings. 3 | ############################################################################### 4 | * text=auto 5 | 6 | ############################################################################### 7 | # Set default behavior for command prompt diff. 8 | # 9 | # This is need for earlier builds of msysgit that does not have it on by 10 | # default for csharp files. 11 | # Note: This is only used by command line 12 | ############################################################################### 13 | #*.cs diff=csharp 14 | 15 | ############################################################################### 16 | # Set the merge driver for project and solution files 17 | # 18 | # Merging from the command prompt will add diff markers to the files if there 19 | # are conflicts (Merging from VS is not affected by the settings below, in VS 20 | # the diff markers are never inserted). Diff markers may cause the following 21 | # file extensions to fail to load in VS. An alternative would be to treat 22 | # these files as binary and thus will always conflict and require user 23 | # intervention with every merge. To do so, just uncomment the entries below 24 | ############################################################################### 25 | #*.sln merge=binary 26 | #*.csproj merge=binary 27 | #*.vbproj merge=binary 28 | #*.vcxproj merge=binary 29 | #*.vcproj merge=binary 30 | #*.dbproj merge=binary 31 | #*.fsproj merge=binary 32 | #*.lsproj merge=binary 33 | #*.wixproj merge=binary 34 | #*.modelproj merge=binary 35 | #*.sqlproj merge=binary 36 | #*.wwaproj merge=binary 37 | 38 | ############################################################################### 39 | # behavior for image files 40 | # 41 | # image files are treated as binary by default. 42 | ############################################################################### 43 | #*.jpg binary 44 | #*.png binary 45 | #*.gif binary 46 | 47 | ############################################################################### 48 | # diff behavior for common document formats 49 | # 50 | # Convert binary document formats to text before diffing them. This feature 51 | # is only available from the command line. Turn it on by uncommenting the 52 | # entries below. 53 | ############################################################################### 54 | #*.doc diff=astextplain 55 | #*.DOC diff=astextplain 56 | #*.docx diff=astextplain 57 | #*.DOCX diff=astextplain 58 | #*.dot diff=astextplain 59 | #*.DOT diff=astextplain 60 | #*.pdf diff=astextplain 61 | #*.PDF diff=astextplain 62 | #*.rtf diff=astextplain 63 | #*.RTF diff=astextplain 64 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![ko-fi](https://ko-fi.com/img/githubbutton_sm.svg)](https://ko-fi.com/J3J6K3Q7G) 2 | 3 | # PatreonDownloader 4 | ⚠ Current state of the project: critical fixes only ⚠ 5 | 6 | Due to various circumstances I do not have ability to spend time on this project at present time. Therefore only critical fixes will be implemented. PRs and issues will be reviewed when the time allows it. 7 | 8 | This application is designed for downloading content posted by creators on patreon.com. 9 | 10 | IMPORTANT: You need a valid patreon account to download both free and paid content. Paid content will only be downloaded if you have an active subscription to creator's page. 11 | 12 | ## Usage 13 | #### Download all available files from creator 14 | PatreonDownloader.App.exe --url #page url#. Page url should follow one of the following patterns: 15 | * https://www.patreon.com/m/#numbers#/posts 16 | * https://www.patreon.com/user?u=#numbers# 17 | * https://www.patreon.com/user/posts?u=#numbers# 18 | * https://www.patreon.com/#creator_name#/posts 19 | #### Download all available files from creator into custom directory and save all possible data (post contents, embed metadata, cover and avatar, json responses) 20 | PatreonDownloader.App.exe --url #page url# --download-directory c:\downloads --descriptions --embeds --campaign-images --json 21 | #### Show available commands and their descriptions 22 | PatreonDownloader.App.exe --help 23 | 24 | ## System requirements 25 | Due to Cloudflare protection triggering on all connections with TLS version lower than 1.3 the application will only work on the following systems: 26 | * Windows 10 1903 and newer 27 | * Linux and other systems with OpenSSL 1.1.1 and newer 28 | 29 | ## Build instructions 30 | See docs\BUILDING.md 31 | 32 | ## Supported features 33 | * Tested under Windows and Linux. Should work on any platform supported by .NET Core and Chromium browser. 34 | * Downloading files from posts 35 | * Downloading files from attachments 36 | * Saving html contents of posts 37 | * Saving metadata of embedded content 38 | * Saving api responses (mostly for troubleshooting purposes) 39 | * External links extraction from post 40 | * C# plugin support (see below) 41 | * Limited/dumb direct link support (PatreonDownloader will attempt to download any file with valid extension if no suitable plugin is installed) 42 | * Dropbox support 43 | * Blacklist (configured in settings.json) 44 | * Plugins (via C#) 45 | * Custom downloaders for adding download support for websites which need custom download logic 46 | * PatreonDownloader comes with the following plugins by default: Google Drive, Mega.nz 47 | 48 | ## Needs further testing 49 | * Gallery posts 50 | 51 | ## Known not implemented or not tested features 52 | * Audio files 53 | * Vimeo embedded videos 54 | * YouTube external links 55 | * imgur external links 56 | 57 | ## License 58 | All files in this repository are licensed under the license listed in LICENSE.md file unless stated otherwise. 59 | -------------------------------------------------------------------------------- /PatreonDownloader.Implementation/Models/PatreonDownloaderSettings.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Text; 4 | using UniversalDownloaderPlatform.Common.Enums; 5 | using UniversalDownloaderPlatform.Common.Helpers; 6 | using UniversalDownloaderPlatform.Common.Interfaces.Models; 7 | using UniversalDownloaderPlatform.DefaultImplementations.Models; 8 | using UniversalDownloaderPlatform.PuppeteerEngine.Interfaces; 9 | 10 | namespace PatreonDownloader.Implementation.Models 11 | { 12 | public record PatreonDownloaderSettings : UniversalDownloaderPlatformSettings, IPuppeteerSettings 13 | { 14 | public bool SaveDescriptions { get; init; } 15 | 16 | public bool SaveEmbeds { get; init; } 17 | 18 | public bool SaveJson { get; init; } 19 | 20 | public bool SaveAvatarAndCover { get; init; } 21 | 22 | /// 23 | /// Create a new directory for every post and store files of said post in that directory 24 | /// 25 | public bool IsUseSubDirectories { get; init; } 26 | 27 | /// 28 | /// Pattern used to generate directory name if UseSubDirectories is enabled 29 | /// 30 | public string SubDirectoryPattern { get; init; } 31 | 32 | /// 33 | /// Subdirectory names will be truncated to this length 34 | /// 35 | public int MaxSubdirectoryNameLength { get; init; } 36 | 37 | /// 38 | /// Filenames will be truncated to this length 39 | /// 40 | public int MaxFilenameLength { get; init; } 41 | 42 | /// 43 | /// Fallback to using sha256 hash and Content-Type for filenames if Content-Disposition fails 44 | /// 45 | public bool FallbackToContentTypeFilenames { get; init; } 46 | 47 | /// 48 | /// Use legacy file naming pattern (without addition of media/attachment ids to filenames). NOT COMPATIBLE WITH FileExistsAction BackupIfDifferent/ReplaceIfDifferent 49 | /// 50 | public bool IsUseLegacyFilenaming { get; init; } 51 | 52 | public string LoginPageAddress { get { return "https://www.patreon.com/login"; } } 53 | public string LoginCheckAddress { get { return "https://www.patreon.com/api/badges?json-api-version=1.0&json-api-use-default-includes=false&include=[]"; } } 54 | public string CaptchaCookieRetrievalAddress { get { return "https://www.patreon.com/home"; } } 55 | public Uri RemoteBrowserAddress { get; init; } 56 | public bool IsHeadlessBrowser { get; init; } 57 | 58 | public PatreonDownloaderSettings() 59 | { 60 | SaveDescriptions = true; 61 | SaveEmbeds = true; 62 | SaveJson = true; 63 | SaveAvatarAndCover = true; 64 | IsUseSubDirectories = false; 65 | SubDirectoryPattern = "[%PostId%] %PublishedAt% %PostTitle%"; 66 | FallbackToContentTypeFilenames = false; 67 | MaxFilenameLength = 100; 68 | MaxSubdirectoryNameLength = 100; 69 | IsUseLegacyFilenaming = false; 70 | IsHeadlessBrowser = true; 71 | } 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /docs/BUILDING.md: -------------------------------------------------------------------------------- 1 | ## Prerequisites 2 | * All platforms: [.NET SDK 9.0](https://dotnet.microsoft.com/en-us/download/dotnet/9.0) 3 | * Windows (optional): [Microsoft Visual Studio 2022](https://visualstudio.microsoft.com/en/vs/) 4 | 5 | ## IMPORTANT: Cloning PatreonDownloader properly 6 | You need to clone submodules in order to build PatreonDownloader. 7 | If you have not yet cloned the repository use **git clone --recurse-submodules**. 8 | If you have cloned the repository without submodules use **git submodule update --init --recursive**. 9 | 10 | ## Running from source code on all platforms 11 | 1. Launch command line in **PatreonDownloader.App** folder 12 | 2. Execute **dotnet run** 13 | 14 | ## Building framework-dependent executable via Visual Studio on Windows 15 | 1. Open **PatreonDownloader.sln** solution 16 | 2. Select desired build configuration in build toolbar and build solution by pressing Build -> Build Solution 17 | 3. Refer to steps 3-4 of **Building framework-dependent executable via command line on all platforms** for further instructions. 18 | 19 | The resulting executable will require .NET Core Runtime to be installed on the computer in order to run. 20 | 21 | ## Building framework-dependent executable via command line on all platforms 22 | 1. Launch command line in **PatreonDownloader.App** folder 23 | 2. Execute **dotnet build -c release** (replace **-c release** with **-c debug** to build debug build) 24 | 3. Compiled application will be placed into **PatreonDownloader.App\bin\\(Release/Debug)\net9.0** 25 | 4. Navigate to **PatreonDownloader.App\bin\\(Release/Debug)\net9.0** folder and run **dotnet PatreonDownloader.App.dll** 26 | 27 | [Refer to official documentation to learn more about "dotnet build" command](https://docs.microsoft.com/en-us/dotnet/core/tools/dotnet-build?tabs=netcore31) 28 | 29 | The resulting executable will require .NET Core Runtime to be installed on the computer in order to run. 30 | 31 | ## Building standalone executable via Visual Studio on Windows 32 | 1. Open **PatreonDownloader.sln** solution 33 | 2. Right click on **PatreonDownloader.App** solution and click **Publish** 34 | 3. Select desired publish profile and click **Publish**. 35 | 36 | Application will be compiled and published in **PatreonDownloader.App\bin\publish\net9.0-(win/linux)-(x86/x64)-(release/debug)**. 37 | 38 | The application will be published as self-contained application and will not need .NET Runtime to function. To run the application use **PatreonDownloader.App(.exe)** executable. 39 | 40 | ## Building standalone executable via command line on all platforms 41 | 1. Launch command line in **PatreonDownloader.App** folder 42 | 2. Run the following command to build self-contained release build targeting .NET 9.0: 43 | 44 | Windows x64: **dotnet publish -c Release -r win-x64 --self-contained -f net9.0 -o bin\publish\net9.0-win-x64-release** 45 | 46 | Linux x64: **dotnet publish -c Release -r linux-x64 --self-contained -f net9.0 -o bin/publish/net9.0-linux-x64-release** 47 | 48 | [Refer to official documentation to learn more about "dotnet publish" command](https://docs.microsoft.com/en-us/dotnet/core/tools/dotnet-publish?tabs=netcore31) 49 | 50 | Application will be compiled and published in folder specified by the **-o** parameter. 51 | 52 | The application will be published as self-contained application and will not need .NET Core Runtime to function. To run the application use **PatreonDownloader.App(.exe)** executable. 53 | 54 | ## Putting additional files into PatreonDownloader folder 55 | PatreonDownloader uses additional plugins from Universal Download Platform which needs to be placed into appropriate folders in order to work correctly. 56 | 57 | Google drive: 58 | After building plugin binaries go to the output folder and copy **Google.Apis.Auth.dll, Google.Apis.Core.dll, Google.Apis.dll, Google.Apis.Drive.v3.dll and UniversalDownloaderPlatform.GoogleDriveDownloader.dll** files into the **plugins** folder inside of PatreonDownloader folder. 59 | 60 | Mega.nz: 61 | After building plugin binaries go to the output folder and copy **MegaApiClient.dll and UniversalDownloaderPlatform.MegaDownloader.dll** files into the **plugins** folder inside of PatreonDownloader folder. -------------------------------------------------------------------------------- /PatreonDownloader.Implementation/PatreonRemoteFilenameRetriever.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Net.Http; 3 | using System.Text.RegularExpressions; 4 | using System.Threading.Tasks; 5 | using HeyRed.Mime; 6 | using NLog; 7 | using PatreonDownloader.Implementation.Helpers; 8 | using PatreonDownloader.Implementation.Interfaces; 9 | using PatreonDownloader.Implementation.Models; 10 | using UniversalDownloaderPlatform.Common.Interfaces.Models; 11 | 12 | namespace PatreonDownloader.Implementation 13 | { 14 | internal class PatreonRemoteFilenameRetriever : IRemoteFilenameRetriever 15 | { 16 | private Regex _urlRegex; 17 | private HttpClient _httpClient; 18 | 19 | private readonly Logger _logger = LogManager.GetCurrentClassLogger(); 20 | private bool _isUseMediaType; 21 | 22 | public PatreonRemoteFilenameRetriever() 23 | { 24 | _urlRegex = new Regex(@"[^\/\&\?]+\.\w{3,4}(?=([\?&].*$|$))"); 25 | 26 | _httpClient = new HttpClient(); 27 | } 28 | 29 | public async Task BeforeStart(IUniversalDownloaderPlatformSettings settings) 30 | { 31 | PatreonDownloaderSettings patreonDownloaderSettings = (PatreonDownloaderSettings)settings; 32 | _isUseMediaType = patreonDownloaderSettings.FallbackToContentTypeFilenames; 33 | } 34 | 35 | /// 36 | /// Retrieve remote file name 37 | /// 38 | /// File name url 39 | /// File name if url is valid, null if url is invalid 40 | public async Task RetrieveRemoteFileName(string url) 41 | { 42 | if (string.IsNullOrEmpty(url)) 43 | return null; 44 | 45 | string mediaType = null; 46 | string filename = null; 47 | try 48 | { 49 | var response = await _httpClient.GetAsync(url, HttpCompletionOption.ResponseHeadersRead); 50 | 51 | if (!string.IsNullOrWhiteSpace(response.Content.Headers.ContentDisposition?.FileName)) 52 | { 53 | filename = response.Content.Headers.ContentDisposition.FileName.Replace("\"", ""); 54 | _logger.Debug($"Content-Disposition returned: {filename}"); 55 | } 56 | else if (!string.IsNullOrWhiteSpace(response.Content.Headers.ContentType?.MediaType) && _isUseMediaType) 57 | { 58 | mediaType = response.Content.Headers.ContentType?.MediaType; 59 | } 60 | } 61 | catch (HttpRequestException ex) 62 | { 63 | _logger.Error($"HttpRequestException while trying to retrieve remote file name: {ex}"); 64 | } 65 | catch (TaskCanceledException ex) 66 | { 67 | _logger.Error($"TaskCanceledException while trying to retrieve remote file name: {ex}"); 68 | } 69 | 70 | if (string.IsNullOrWhiteSpace(filename)) 71 | { 72 | Match match = _urlRegex.Match(url); 73 | if (match.Success) 74 | { 75 | filename = match.Groups[0].Value; //?? throw new ArgumentException("Invalid url", nameof(url)); 76 | 77 | // Patreon truncates extensions so we need to fix this 78 | if (url.Contains("patreonusercontent.com/", StringComparison.Ordinal)) 79 | { 80 | if (filename.EndsWith(".jpe")) 81 | filename += "g"; 82 | } 83 | _logger.Debug($"Content-Disposition failed, fallback to url extraction, extracted name: {filename}"); 84 | } 85 | } 86 | 87 | if (!string.IsNullOrWhiteSpace(mediaType) && string.IsNullOrWhiteSpace(filename)) 88 | { 89 | filename = 90 | $"gen_{HashHelper.ComputeSha256Hash(url)}.{MimeTypesMap.GetExtension(mediaType)}"; 91 | 92 | _logger.Debug($"Content-Disposition and url extraction failed, fallback to Content-Type + hash based name: {filename}"); 93 | } 94 | 95 | return filename; 96 | } 97 | } 98 | } 99 | -------------------------------------------------------------------------------- /PatreonDownloader.Implementation/PatreonCrawlTargetInfoRetriever.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Security.AccessControl; 5 | using System.Text; 6 | using System.Text.RegularExpressions; 7 | using System.Threading.Tasks; 8 | using Newtonsoft.Json; 9 | using NLog; 10 | using UniversalDownloaderPlatform.Common.Exceptions; 11 | using UniversalDownloaderPlatform.Common.Interfaces; 12 | using UniversalDownloaderPlatform.Common.Interfaces.Models; 13 | 14 | namespace PatreonDownloader.Implementation 15 | { 16 | internal sealed class PatreonCrawlTargetInfoRetriever : ICrawlTargetInfoRetriever 17 | { 18 | private readonly IWebDownloader _webDownloader; 19 | private readonly Logger _logger = LogManager.GetCurrentClassLogger(); 20 | 21 | public PatreonCrawlTargetInfoRetriever(IWebDownloader webDownloader) 22 | { 23 | _webDownloader = webDownloader ?? throw new ArgumentNullException(nameof(webDownloader)); 24 | } 25 | 26 | public async Task RetrieveCrawlTargetInfo(string url) 27 | { 28 | long campaignId = await GetCampaignId(url); 29 | 30 | return await GetCrawlTargetInfo(campaignId); 31 | } 32 | 33 | private async Task GetCampaignId(string url) 34 | { 35 | if (string.IsNullOrEmpty(url)) 36 | throw new ArgumentException("Argument cannot be null or empty", nameof(url)); 37 | 38 | try 39 | { 40 | string pageHtml = await _webDownloader.DownloadString(url); 41 | 42 | Regex regex = new Regex("\\\\?\"self\\\\?\": ?\\\\?\"https:\\/\\/www\\.patreon\\.com\\/api\\/campaigns\\/(\\d+)\\\\?\""); 43 | Match match = regex.Match(pageHtml); 44 | if (!match.Success) 45 | throw new UniversalDownloaderException($"Unable to retrieve campaign id: regex failed. Report this error to developer."); 46 | 47 | return Convert.ToInt64(match.Groups[1].Value); 48 | } 49 | catch(DownloadException downloadEx) 50 | { 51 | if(downloadEx.StatusCode == System.Net.HttpStatusCode.Forbidden) 52 | { 53 | _logger.Fatal("Cannot retrieve creator compaign id. This usually means your network is being blocked by Patreon. If you are using VPN or proxy try running application without it. If you are NOT using VPN or proxy - try using them. This is usually NOT an issue with PatreonDownloader even if you can access Patreon via your web browser."); 54 | throw new UniversalDownloaderException($"Unable to retrieve campaign id: 403 Forbidden"); 55 | } 56 | 57 | throw new UniversalDownloaderException($"Unable to retrieve campaign id: {downloadEx.Message}", downloadEx); 58 | } 59 | catch (Exception ex) 60 | { 61 | throw new UniversalDownloaderException($"Unable to retrieve campaign id: {ex.Message}", ex); 62 | } 63 | } 64 | 65 | private async Task GetCrawlTargetInfo(long campaignId) 66 | { 67 | try 68 | { 69 | if (campaignId < 1) 70 | throw new ArgumentOutOfRangeException(nameof(campaignId), "Campaign id cannot be less than 1"); 71 | 72 | string json = await _webDownloader.DownloadString( 73 | $"https://www.patreon.com/api/campaigns/{campaignId}?include=access_rules.tier.null&fields[access_rule]=access_rule_type%2Camount_cents%2Cpost_count&fields[reward]=title%2Cid%2Camount_cents&json-api-version=1.0"); 74 | 75 | Models.JSONObjects.Campaign.Root root = JsonConvert.DeserializeObject(json); 76 | 77 | return new PatreonCrawlTargetInfo 78 | { 79 | AvatarUrl = root.Data.Attributes.AvatarUrl, 80 | CoverUrl = root.Data.Attributes.CoverUrl, 81 | Name = root.Data.Attributes.Name, 82 | Id = campaignId 83 | }; 84 | } 85 | catch (Exception ex) 86 | { 87 | throw new UniversalDownloaderException($"Unable to retrieve crawl target info: {ex.Message}", ex); 88 | } 89 | } 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /PatreonDownloader.sln: -------------------------------------------------------------------------------- 1 | 2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 17 4 | VisualStudioVersion = 17.4.33122.133 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "PatreonDownloader.App", "PatreonDownloader.App\PatreonDownloader.App.csproj", "{DA8399F8-9CC8-46DB-877E-7B85F3AFCF98}" 7 | EndProject 8 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "PatreonDownloader.Implementation", "PatreonDownloader.Implementation\PatreonDownloader.Implementation.csproj", "{3C84157E-4044-4E4E-816A-0CA0EA438D16}" 9 | EndProject 10 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "submodules", "submodules", "{B84B7F77-FEAC-4404-B299-70798AF91610}" 11 | EndProject 12 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "UniversalDownloaderPlatform.Common", "submodules\UniversalDownloaderPlatform\UniversalDownloaderPlatform.Common\UniversalDownloaderPlatform.Common.csproj", "{2A752CD2-66E9-455D-8160-EB7DD5AA43A0}" 13 | EndProject 14 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "UniversalDownloaderPlatform.DefaultImplementations", "submodules\UniversalDownloaderPlatform\UniversalDownloaderPlatform.DefaultImplementations\UniversalDownloaderPlatform.DefaultImplementations.csproj", "{4D6CFA58-7C03-4451-AE58-7F1C148787A3}" 15 | EndProject 16 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "UniversalDownloaderPlatform.Engine", "submodules\UniversalDownloaderPlatform\UniversalDownloaderPlatform.Engine\UniversalDownloaderPlatform.Engine.csproj", "{A887D576-2DDB-4DDD-93FC-3C32544C0050}" 17 | EndProject 18 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Tests", "Tests", "{C675CCB3-6202-4A24-BDB4-635F5B3783BE}" 19 | EndProject 20 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "PatreonDownloader.Tests", "PatreonDownloader.Tests\PatreonDownloader.Tests.csproj", "{32589019-22B4-49E3-9710-86407EA1F911}" 21 | EndProject 22 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "UniversalDownloaderPlatform.PuppeteerEngine", "submodules\UniversalDownloaderPlatform\UniversalDownloaderPlatform.PuppeteerEngine\UniversalDownloaderPlatform.PuppeteerEngine.csproj", "{2EF4E7BA-9E1C-43D7-B483-AD5D90033631}" 23 | EndProject 24 | Global 25 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 26 | Debug|Any CPU = Debug|Any CPU 27 | Release|Any CPU = Release|Any CPU 28 | EndGlobalSection 29 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 30 | {DA8399F8-9CC8-46DB-877E-7B85F3AFCF98}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 31 | {DA8399F8-9CC8-46DB-877E-7B85F3AFCF98}.Debug|Any CPU.Build.0 = Debug|Any CPU 32 | {DA8399F8-9CC8-46DB-877E-7B85F3AFCF98}.Release|Any CPU.ActiveCfg = Release|Any CPU 33 | {DA8399F8-9CC8-46DB-877E-7B85F3AFCF98}.Release|Any CPU.Build.0 = Release|Any CPU 34 | {3C84157E-4044-4E4E-816A-0CA0EA438D16}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 35 | {3C84157E-4044-4E4E-816A-0CA0EA438D16}.Debug|Any CPU.Build.0 = Debug|Any CPU 36 | {3C84157E-4044-4E4E-816A-0CA0EA438D16}.Release|Any CPU.ActiveCfg = Release|Any CPU 37 | {3C84157E-4044-4E4E-816A-0CA0EA438D16}.Release|Any CPU.Build.0 = Release|Any CPU 38 | {2A752CD2-66E9-455D-8160-EB7DD5AA43A0}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 39 | {2A752CD2-66E9-455D-8160-EB7DD5AA43A0}.Debug|Any CPU.Build.0 = Debug|Any CPU 40 | {2A752CD2-66E9-455D-8160-EB7DD5AA43A0}.Release|Any CPU.ActiveCfg = Release|Any CPU 41 | {2A752CD2-66E9-455D-8160-EB7DD5AA43A0}.Release|Any CPU.Build.0 = Release|Any CPU 42 | {4D6CFA58-7C03-4451-AE58-7F1C148787A3}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 43 | {4D6CFA58-7C03-4451-AE58-7F1C148787A3}.Debug|Any CPU.Build.0 = Debug|Any CPU 44 | {4D6CFA58-7C03-4451-AE58-7F1C148787A3}.Release|Any CPU.ActiveCfg = Release|Any CPU 45 | {4D6CFA58-7C03-4451-AE58-7F1C148787A3}.Release|Any CPU.Build.0 = Release|Any CPU 46 | {A887D576-2DDB-4DDD-93FC-3C32544C0050}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 47 | {A887D576-2DDB-4DDD-93FC-3C32544C0050}.Debug|Any CPU.Build.0 = Debug|Any CPU 48 | {A887D576-2DDB-4DDD-93FC-3C32544C0050}.Release|Any CPU.ActiveCfg = Release|Any CPU 49 | {A887D576-2DDB-4DDD-93FC-3C32544C0050}.Release|Any CPU.Build.0 = Release|Any CPU 50 | {32589019-22B4-49E3-9710-86407EA1F911}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 51 | {32589019-22B4-49E3-9710-86407EA1F911}.Debug|Any CPU.Build.0 = Debug|Any CPU 52 | {32589019-22B4-49E3-9710-86407EA1F911}.Release|Any CPU.ActiveCfg = Release|Any CPU 53 | {32589019-22B4-49E3-9710-86407EA1F911}.Release|Any CPU.Build.0 = Release|Any CPU 54 | {2EF4E7BA-9E1C-43D7-B483-AD5D90033631}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 55 | {2EF4E7BA-9E1C-43D7-B483-AD5D90033631}.Debug|Any CPU.Build.0 = Debug|Any CPU 56 | {2EF4E7BA-9E1C-43D7-B483-AD5D90033631}.Release|Any CPU.ActiveCfg = Release|Any CPU 57 | {2EF4E7BA-9E1C-43D7-B483-AD5D90033631}.Release|Any CPU.Build.0 = Release|Any CPU 58 | EndGlobalSection 59 | GlobalSection(SolutionProperties) = preSolution 60 | HideSolutionNode = FALSE 61 | EndGlobalSection 62 | GlobalSection(NestedProjects) = preSolution 63 | {2A752CD2-66E9-455D-8160-EB7DD5AA43A0} = {B84B7F77-FEAC-4404-B299-70798AF91610} 64 | {4D6CFA58-7C03-4451-AE58-7F1C148787A3} = {B84B7F77-FEAC-4404-B299-70798AF91610} 65 | {A887D576-2DDB-4DDD-93FC-3C32544C0050} = {B84B7F77-FEAC-4404-B299-70798AF91610} 66 | {32589019-22B4-49E3-9710-86407EA1F911} = {C675CCB3-6202-4A24-BDB4-635F5B3783BE} 67 | {2EF4E7BA-9E1C-43D7-B483-AD5D90033631} = {B84B7F77-FEAC-4404-B299-70798AF91610} 68 | EndGlobalSection 69 | GlobalSection(ExtensibilityGlobals) = postSolution 70 | SolutionGuid = {D69B5C66-A2F2-44AE-B347-C072E8538DF6} 71 | EndGlobalSection 72 | EndGlobal 73 | -------------------------------------------------------------------------------- /PatreonDownloader.App/Models/CommandLineOptions.cs: -------------------------------------------------------------------------------- 1 | using CommandLine; 2 | using PatreonDownloader.App.Enums; 3 | using UniversalDownloaderPlatform.Common.Enums; 4 | 5 | namespace PatreonDownloader.App.Models 6 | { 7 | class CommandLineOptions 8 | { 9 | [Option("url", Required = true, HelpText = "Url of the creator's page")] 10 | public string Url { get; set; } 11 | [Option("descriptions", Required = false, HelpText = "Save post descriptions", Default = false)] 12 | public bool SaveDescriptions { get; set; } 13 | [Option("embeds", Required = false, HelpText = "Save embedded content metadata", Default = false)] 14 | public bool SaveEmbeds { get; set; } 15 | [Option("json", Required = false, HelpText = "Save json data", Default = false)] 16 | public bool SaveJson { get; set; } 17 | 18 | [Option("campaign-images", Required = false, HelpText = "Download campaign's avatar and cover images", Default = false)] 19 | public bool SaveAvatarAndCover { get; set; } 20 | 21 | [Option("download-directory", Required = false, HelpText = "Directory to save all downloaded files in, default: #AppDirectory#/downloads/#CreatorName#.")] 22 | public string DownloadDirectory { get; set; } 23 | 24 | [Option("log-level", Required = false, HelpText = "Logging level. Possible options: Default, Debug, Trace. Affects both console and file logging.", Default = LogLevel.Default)] 25 | public LogLevel LogLevel { get; set; } 26 | 27 | [Option("log-save", Required = false, HelpText = "Create log files in the \"logs\" directory.", Default = false)] 28 | public bool SaveLogs { get; set; } 29 | 30 | [Option("file-exists-action", Required = false, HelpText = 31 | "What to do with files already existing on the disk.\r\nPossible options:\r\n" + 32 | "BackupIfDifferent: Check remote file size if enabled and available. If it's different, disabled or not available then download remote file and compare it with existing file, create a backup copy of old file if they are different.\r\n" + 33 | "ReplaceIfDifferent: Same as BackupIfDifferent, but the backup copy of the file will not be created.\r\n" + 34 | "AlwaysReplace: Always replace existing file. Warning: will result in increased bandwidth usage.\r\n" + 35 | "KeepExisting: Always keep existing file. The most bandwidth-friendly option.", 36 | Default = FileExistsAction.BackupIfDifferent)] 37 | public FileExistsAction FileExistsAction { get; set; } 38 | 39 | [Option("use-legacy-file-naming", Required = false, HelpText = "Use legacy filenaming pattern (used before version 21). Not compatible with --file-exists-action BackupIfDifferent, ReplaceIfDifferent. Warning: this is compatibility option and might be removed in the future, you should not use it unless you absolutely need it.", Default = false)] 40 | public bool IsUseLegacyFilenaming { get; set; } 41 | 42 | [Option("disable-remote-file-size-check", Required = false, 43 | HelpText = "Do not ask the server for the file size (if it's available) and do not use it in various pre-download checks if the file already exists on the disk. Warning: will result in increased bandwidth usage if used with --file-exists-action BackupIfDifferent, ReplaceIfDifferent, AlwaysReplace.", 44 | Default = false)] 45 | public bool IsDisableRemoteFileSizeCheck { get; set; } 46 | 47 | [Option("remote-browser-address", Required = false, HelpText = "Advanced users only. Address of the browser with remote debugging enabled. Refer to documentation for more details.")] 48 | public string RemoteBrowserAddress { get; set; } 49 | 50 | [Option("use-sub-directories", Required = false, HelpText = "Create a new directory inside of the download directory for every post instead of placing all files into a single directory.")] 51 | public bool UseSubDirectories { get; set; } 52 | 53 | [Option("sub-directory-pattern", Required = false, HelpText = "Pattern which will be used to create a name for the sub directories if --use-sub-directories is used. Supported parameters: %PostId%, %PublishedAt%, %PostTitle%.", Default = "[%PostId%] %PublishedAt% %PostTitle%")] 54 | public string SubDirectoryPattern { get; set; } 55 | 56 | [Option("max-sub-directory-name-length", Required = false, HelpText = "Limits the length of the name for the subdirectories created when --use-sub-directories is used.", Default = 100)] 57 | public int MaxSubdirectoryNameLength { get; set; } 58 | 59 | [Option("max-filename-length", Required = false, HelpText = "All names of downloaded files will be truncated so their length won't be more than specified value (excluding file extension)", Default = 100)] 60 | public int MaxFilenameLength { get; set; } 61 | 62 | [Option("filenames-fallback-to-content-type", Required = false, HelpText = "Fallback to using filename generated from url hash if the server returns file content type (extension) and all other methods have failed. Use with caution, this might result in unwanted files being created or the same files being downloaded on every run under different names.", Default = false)] 63 | public bool FilenamesFallbackToContentType { get; set; } 64 | 65 | [Option("proxy-server-address", Required = false, HelpText = "The address of proxy server to use in the following format: [://][:]. Supported protocols: http(s), socks4, socks4a, socks5.")] 66 | public string ProxyServerAddress { get; set; } 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /PatreonDownloader.Implementation/PatreonDefaultPlugin.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.IO; 4 | using System.Runtime; 5 | using System.Text.RegularExpressions; 6 | using System.Threading.Tasks; 7 | using HtmlAgilityPack; 8 | using NLog; 9 | using PatreonDownloader.Implementation; 10 | using PatreonDownloader.Implementation.Enums; 11 | using PatreonDownloader.Implementation.Interfaces; 12 | using UniversalDownloaderPlatform.Common.Exceptions; 13 | using UniversalDownloaderPlatform.Common.Interfaces; 14 | using UniversalDownloaderPlatform.Common.Interfaces.Models; 15 | using UniversalDownloaderPlatform.Common.Interfaces.Plugins; 16 | using UniversalDownloaderPlatform.DefaultImplementations.Models; 17 | 18 | namespace PatreonDownloader.Engine 19 | { 20 | /// 21 | /// This is the default download/parsing plugin for all files 22 | /// This plugin is used when no other plugins are available for url 23 | /// 24 | internal sealed class PatreonDefaultPlugin : IPlugin 25 | { 26 | private IWebDownloader _webDownloader; 27 | 28 | private readonly Logger _logger = LogManager.GetCurrentClassLogger(); 29 | 30 | public string Name => "Default plugin"; 31 | 32 | public string Author => "Aleksey Tsutsey"; 33 | public string ContactInformation => "https://github.com/Megalan/PatreonDownloader"; 34 | 35 | private IUniversalDownloaderPlatformSettings _settings; 36 | 37 | public PatreonDefaultPlugin(IWebDownloader webDownloader) 38 | { 39 | _webDownloader = webDownloader ?? throw new ArgumentNullException(nameof(webDownloader)); 40 | } 41 | 42 | public void OnLoad(IDependencyResolver dependencyResolver) 43 | { 44 | //do nothing 45 | } 46 | 47 | public async Task IsSupportedUrl(string url) 48 | { 49 | if (string.IsNullOrEmpty(url)) 50 | return false; 51 | 52 | return await Task.FromResult(true); 53 | } 54 | 55 | public async Task Download(ICrawledUrl crawledUrl) 56 | { 57 | if(crawledUrl == null) 58 | throw new ArgumentNullException(nameof(crawledUrl)); 59 | 60 | if (string.IsNullOrWhiteSpace(crawledUrl.DownloadPath)) 61 | throw new DownloadException($"Download path is not filled for {crawledUrl.Url}"); 62 | 63 | await _webDownloader.DownloadFile(crawledUrl.Url, Path.Combine(_settings.DownloadDirectory, crawledUrl.DownloadPath), null); //referer is set in PatreonWebDownloader 64 | } 65 | 66 | public Task BeforeStart(IUniversalDownloaderPlatformSettings settings) 67 | { 68 | _settings = settings; 69 | return Task.CompletedTask; 70 | } 71 | 72 | public async Task> ExtractSupportedUrls(string htmlContents) 73 | { 74 | List retList = new List(); 75 | HtmlDocument doc = new HtmlDocument(); 76 | doc.LoadHtml(htmlContents); 77 | HtmlNodeCollection imgNodeCollection = doc.DocumentNode.SelectNodes("//img"); 78 | if (imgNodeCollection != null) 79 | { 80 | foreach (var imgNode in imgNodeCollection) 81 | { 82 | if (imgNode.Attributes.Count == 0 || !imgNode.Attributes.Contains("src")) 83 | continue; 84 | 85 | string url = imgNode.Attributes["src"].Value; 86 | 87 | url = url.Replace("&", "&"); //sometimes there are broken links with & instead of & 88 | 89 | if (IsAllowedUrl(url)) 90 | { 91 | retList.Add(url); 92 | 93 | _logger.Debug($"Parsed by default plugin (image): {url}"); 94 | } 95 | } 96 | } 97 | 98 | HtmlNodeCollection linkNodeCollection = doc.DocumentNode.SelectNodes("//a"); 99 | if (linkNodeCollection != null) 100 | { 101 | foreach (var linkNode in linkNodeCollection) 102 | { 103 | if (linkNode.Attributes.Count == 0 || !linkNode.Attributes.Contains("href")) 104 | continue; 105 | 106 | var url = linkNode.Attributes["href"].Value; 107 | 108 | url = url.Replace("&", "&"); //sometimes there are broken links with & instead of & 109 | 110 | if (IsAllowedUrl(url)) 111 | { 112 | retList.Add(url); 113 | _logger.Debug($"Parsed by default plugin (direct): {url}"); 114 | } 115 | } 116 | } 117 | 118 | return retList; 119 | } 120 | 121 | private bool IsAllowedUrl(string url) 122 | { 123 | if (url.StartsWith("https://mega.nz/")) 124 | { 125 | //This should never be called if mega plugin is installed 126 | _logger.Debug($"Mega plugin not installed, file will not be downloaded: {url}"); 127 | return false; 128 | } 129 | 130 | return true; 131 | } 132 | 133 | public Task ProcessCrawledUrl(ICrawledUrl crawledUrl) 134 | { 135 | if (crawledUrl.Url.IndexOf("dropbox.com/", StringComparison.Ordinal) != -1) 136 | { 137 | if (!crawledUrl.Url.EndsWith("?dl=1")) 138 | { 139 | if (crawledUrl.Url.EndsWith("?dl=0")) 140 | crawledUrl.Url = crawledUrl.Url.Replace("?dl=0", "?dl=1"); 141 | else 142 | crawledUrl.Url = $"{crawledUrl.Url}?dl=1"; 143 | } 144 | 145 | _logger.Trace($"Dropbox url found: {crawledUrl.Url}"); 146 | } 147 | 148 | return Task.FromResult(false); //we still want full processing for all crawled urls passed here 149 | } 150 | } 151 | } 152 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | ## 4 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore 5 | 6 | launchSettings.json 7 | 8 | # User-specific files 9 | *.rsuser 10 | *.suo 11 | *.user 12 | *.userosscache 13 | *.sln.docstates 14 | 15 | # User-specific files (MonoDevelop/Xamarin Studio) 16 | *.userprefs 17 | 18 | # Build results 19 | [Dd]ebug/ 20 | [Dd]ebugPublic/ 21 | [Rr]elease/ 22 | [Rr]eleases/ 23 | x64/ 24 | x86/ 25 | [Aa][Rr][Mm]/ 26 | [Aa][Rr][Mm]64/ 27 | bld/ 28 | [Bb]in/ 29 | [Oo]bj/ 30 | [Ll]og/ 31 | 32 | # Visual Studio 2015/2017 cache/options directory 33 | .vs/ 34 | # Uncomment if you have tasks that create the project's static files in wwwroot 35 | #wwwroot/ 36 | 37 | # Visual Studio 2017 auto generated files 38 | Generated\ Files/ 39 | 40 | # MSTest test Results 41 | [Tt]est[Rr]esult*/ 42 | [Bb]uild[Ll]og.* 43 | 44 | # NUNIT 45 | *.VisualState.xml 46 | TestResult.xml 47 | 48 | # Build Results of an ATL Project 49 | [Dd]ebugPS/ 50 | [Rr]eleasePS/ 51 | dlldata.c 52 | 53 | # Benchmark Results 54 | BenchmarkDotNet.Artifacts/ 55 | 56 | # .NET Core 57 | project.lock.json 58 | project.fragment.lock.json 59 | artifacts/ 60 | 61 | # StyleCop 62 | StyleCopReport.xml 63 | 64 | # Files built by Visual Studio 65 | *_i.c 66 | *_p.c 67 | *_h.h 68 | *.ilk 69 | *.meta 70 | *.obj 71 | *.iobj 72 | *.pch 73 | *.pdb 74 | *.ipdb 75 | *.pgc 76 | *.pgd 77 | *.rsp 78 | *.sbr 79 | *.tlb 80 | *.tli 81 | *.tlh 82 | *.tmp 83 | *.tmp_proj 84 | *_wpftmp.csproj 85 | *.log 86 | *.vspscc 87 | *.vssscc 88 | .builds 89 | *.pidb 90 | *.svclog 91 | *.scc 92 | 93 | # Chutzpah Test files 94 | _Chutzpah* 95 | 96 | # Visual C++ cache files 97 | ipch/ 98 | *.aps 99 | *.ncb 100 | *.opendb 101 | *.opensdf 102 | *.sdf 103 | *.cachefile 104 | *.VC.db 105 | *.VC.VC.opendb 106 | 107 | # Visual Studio profiler 108 | *.psess 109 | *.vsp 110 | *.vspx 111 | *.sap 112 | 113 | # Visual Studio Trace Files 114 | *.e2e 115 | 116 | # TFS 2012 Local Workspace 117 | $tf/ 118 | 119 | # Guidance Automation Toolkit 120 | *.gpState 121 | 122 | # ReSharper is a .NET coding add-in 123 | _ReSharper*/ 124 | *.[Rr]e[Ss]harper 125 | *.DotSettings.user 126 | 127 | # JustCode is a .NET coding add-in 128 | .JustCode 129 | 130 | # TeamCity is a build add-in 131 | _TeamCity* 132 | 133 | # DotCover is a Code Coverage Tool 134 | *.dotCover 135 | 136 | # AxoCover is a Code Coverage Tool 137 | .axoCover/* 138 | !.axoCover/settings.json 139 | 140 | # Visual Studio code coverage results 141 | *.coverage 142 | *.coveragexml 143 | 144 | # NCrunch 145 | _NCrunch_* 146 | .*crunch*.local.xml 147 | nCrunchTemp_* 148 | 149 | # MightyMoose 150 | *.mm.* 151 | AutoTest.Net/ 152 | 153 | # Web workbench (sass) 154 | .sass-cache/ 155 | 156 | # Installshield output folder 157 | [Ee]xpress/ 158 | 159 | # DocProject is a documentation generator add-in 160 | DocProject/buildhelp/ 161 | DocProject/Help/*.HxT 162 | DocProject/Help/*.HxC 163 | DocProject/Help/*.hhc 164 | DocProject/Help/*.hhk 165 | DocProject/Help/*.hhp 166 | DocProject/Help/Html2 167 | DocProject/Help/html 168 | 169 | # Click-Once directory 170 | publish/ 171 | 172 | # Publish Web Output 173 | *.[Pp]ublish.xml 174 | *.azurePubxml 175 | # Note: Comment the next line if you want to checkin your web deploy settings, 176 | # but database connection strings (with potential passwords) will be unencrypted 177 | #*.pubxml 178 | #*.publishproj 179 | 180 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 181 | # checkin your Azure Web App publish settings, but sensitive information contained 182 | # in these scripts will be unencrypted 183 | #PublishScripts/ 184 | 185 | # NuGet Packages 186 | *.nupkg 187 | # The packages folder can be ignored because of Package Restore 188 | **/[Pp]ackages/* 189 | # except build/, which is used as an MSBuild target. 190 | !**/[Pp]ackages/build/ 191 | # Uncomment if necessary however generally it will be regenerated when needed 192 | #!**/[Pp]ackages/repositories.config 193 | # NuGet v3's project.json files produces more ignorable files 194 | *.nuget.props 195 | *.nuget.targets 196 | 197 | # Microsoft Azure Build Output 198 | csx/ 199 | *.build.csdef 200 | 201 | # Microsoft Azure Emulator 202 | ecf/ 203 | rcf/ 204 | 205 | # Windows Store app package directories and files 206 | AppPackages/ 207 | BundleArtifacts/ 208 | Package.StoreAssociation.xml 209 | _pkginfo.txt 210 | *.appx 211 | 212 | # Visual Studio cache files 213 | # files ending in .cache can be ignored 214 | *.[Cc]ache 215 | # but keep track of directories ending in .cache 216 | !?*.[Cc]ache/ 217 | 218 | # Others 219 | ClientBin/ 220 | ~$* 221 | *~ 222 | *.dbmdl 223 | *.dbproj.schemaview 224 | *.jfm 225 | *.pfx 226 | *.publishsettings 227 | orleans.codegen.cs 228 | 229 | # Including strong name files can present a security risk 230 | # (https://github.com/github/gitignore/pull/2483#issue-259490424) 231 | #*.snk 232 | 233 | # Since there are multiple workflows, uncomment next line to ignore bower_components 234 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 235 | #bower_components/ 236 | 237 | # RIA/Silverlight projects 238 | Generated_Code/ 239 | 240 | # Backup & report files from converting an old project file 241 | # to a newer Visual Studio version. Backup files are not needed, 242 | # because we have git ;-) 243 | _UpgradeReport_Files/ 244 | Backup*/ 245 | UpgradeLog*.XML 246 | UpgradeLog*.htm 247 | ServiceFabricBackup/ 248 | *.rptproj.bak 249 | 250 | # SQL Server files 251 | *.mdf 252 | *.ldf 253 | *.ndf 254 | 255 | # Business Intelligence projects 256 | *.rdl.data 257 | *.bim.layout 258 | *.bim_*.settings 259 | *.rptproj.rsuser 260 | *- Backup*.rdl 261 | 262 | # Microsoft Fakes 263 | FakesAssemblies/ 264 | 265 | # GhostDoc plugin setting file 266 | *.GhostDoc.xml 267 | 268 | # Node.js Tools for Visual Studio 269 | .ntvs_analysis.dat 270 | node_modules/ 271 | 272 | # Visual Studio 6 build log 273 | *.plg 274 | 275 | # Visual Studio 6 workspace options file 276 | *.opt 277 | 278 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.) 279 | *.vbw 280 | 281 | # Visual Studio LightSwitch build output 282 | **/*.HTMLClient/GeneratedArtifacts 283 | **/*.DesktopClient/GeneratedArtifacts 284 | **/*.DesktopClient/ModelManifest.xml 285 | **/*.Server/GeneratedArtifacts 286 | **/*.Server/ModelManifest.xml 287 | _Pvt_Extensions 288 | 289 | # Paket dependency manager 290 | .paket/paket.exe 291 | paket-files/ 292 | 293 | # FAKE - F# Make 294 | .fake/ 295 | 296 | # JetBrains Rider 297 | .idea/ 298 | *.sln.iml 299 | 300 | # CodeRush personal settings 301 | .cr/personal 302 | 303 | # Python Tools for Visual Studio (PTVS) 304 | __pycache__/ 305 | *.pyc 306 | 307 | # Cake - Uncomment if you are using it 308 | # tools/** 309 | # !tools/packages.config 310 | 311 | # Tabs Studio 312 | *.tss 313 | 314 | # Telerik's JustMock configuration file 315 | *.jmconfig 316 | 317 | # BizTalk build output 318 | *.btp.cs 319 | *.btm.cs 320 | *.odx.cs 321 | *.xsd.cs 322 | 323 | # OpenCover UI analysis results 324 | OpenCover/ 325 | 326 | # Azure Stream Analytics local run output 327 | ASALocalRun/ 328 | 329 | # MSBuild Binary and Structured Log 330 | *.binlog 331 | 332 | # NVidia Nsight GPU debugger configuration file 333 | *.nvuser 334 | 335 | # MFractors (Xamarin productivity tool) working folder 336 | .mfractor/ 337 | 338 | # Local History for Visual Studio 339 | .localhistory/ 340 | 341 | # BeatPulse healthcheck temp database 342 | healthchecksdb -------------------------------------------------------------------------------- /PatreonDownloader.Tests/PatreonCrawledUrlProcessorTests.cs: -------------------------------------------------------------------------------- 1 | using PatreonDownloader.Implementation; 2 | using System; 3 | using System.Collections.Generic; 4 | using System.IO; 5 | using System.Linq; 6 | using System.Net; 7 | using System.Text; 8 | using System.Threading.Tasks; 9 | using PatreonDownloader.Implementation.Enums; 10 | using PatreonDownloader.Implementation.Models; 11 | using UniversalDownloaderPlatform.Common.Enums; 12 | using Xunit; 13 | 14 | namespace PatreonDownloader.Tests 15 | { 16 | public class PatreonCrawledUrlProcessorTests 17 | { 18 | [Fact] 19 | public async Task ProcessCrawledUrl_MediaFileNameIsUrl_IsTruncatedAndNoExtension() 20 | { 21 | PatreonDownloaderSettings settings = new PatreonDownloaderSettings 22 | { 23 | CookieContainer = new CookieContainer(), 24 | DownloadDirectory = "c:\\downloads\\UnitTesting", 25 | MaxDownloadRetries = 10, 26 | FileExistsAction = FileExistsAction.KeepExisting, 27 | RetryMultiplier = 1, 28 | SaveAvatarAndCover = true, 29 | SaveDescriptions = true, 30 | SaveEmbeds = true, 31 | SaveJson = true, 32 | IsUseSubDirectories = true, 33 | SubDirectoryPattern = "[%PostId%] %PublishedAt% %PostTitle%", 34 | MaxFilenameLength = 50 35 | }; 36 | 37 | PatreonCrawledUrl crawledUrl = new PatreonCrawledUrl 38 | { 39 | PostId = "123456", 40 | Title = "Test Post", 41 | PublishedAt = DateTime.Parse("07.07.2020 20:00:15"), 42 | Url = "https://www.patreon.com/media-u/Z0FBQUFBQmhXZDd3LXMwN0lJUFdVYTVIMEY1OGxzZTgwaFpQcW5TMk5WQVgxd2JVRFZvRXhjMjQ2V09oTW51eUpLQzIyOW1TdHRzYkY2Uk4yclAwX0VsSXBPMFZsNTBTcmZoaGx4OXJkR1Zham1CYl9fOWNVb3AzZGN1Wl9FMmNzcmIxc3hDek4xcHNuRV92LUVqQ0JESE4tcVBNYzlxYkRnWQ1=", 43 | Filename = "https://www.patreon.com/media-u/Z0FBQUFBQmhXZDd3a0xfckdEWmFrU0tjZHFUUkZfaDZ1OW92TjFVWFVDNk02c2FvS2FNczZxMS1rSVlaNUotX095dUNhdzJBSmYzMVpDV1luR1BYSXR6OVlZelpFOFFVektEcnpJT1plbElua2kwT1N2ZUMyU1NWaHV0eHQydWhnUXlmVWVLVDFYclBsSDBRaVJ3MDA5d2tzdDRZR3dtb3dBWQ1=", 44 | UrlType = PatreonCrawledUrlType.PostMedia 45 | }; 46 | 47 | PatreonCrawledUrlProcessor crawledUrlProcessor = new PatreonCrawledUrlProcessor(new PatreonRemoteFilenameRetriever()); 48 | await crawledUrlProcessor.BeforeStart(settings); 49 | await crawledUrlProcessor.ProcessCrawledUrl(crawledUrl); 50 | 51 | Assert.Equal(@"c:\downloads\UnitTesting\[123456] 2020-07-07 Test Post\media_https___www.patreon.com_media-u_Z0FBQUFBQmhX", crawledUrl.DownloadPath); 52 | } 53 | 54 | [Fact] 55 | public async Task ProcessCrawledUrl_MediaFileNameTooLong_IsTruncatedWithExtension() 56 | { 57 | PatreonDownloaderSettings settings = new PatreonDownloaderSettings 58 | { 59 | CookieContainer = new CookieContainer(), 60 | DownloadDirectory = "c:\\downloads\\UnitTesting", 61 | MaxDownloadRetries = 10, 62 | FileExistsAction = FileExistsAction.KeepExisting, 63 | RetryMultiplier = 1, 64 | SaveAvatarAndCover = true, 65 | SaveDescriptions = true, 66 | SaveEmbeds = true, 67 | SaveJson = true, 68 | IsUseSubDirectories = true, 69 | SubDirectoryPattern = "[%PostId%] %PublishedAt% %PostTitle%", 70 | MaxFilenameLength = 50 71 | }; 72 | 73 | PatreonCrawledUrl crawledUrl = new PatreonCrawledUrl 74 | { 75 | PostId = "123456", 76 | Title = "Test Post", 77 | PublishedAt = DateTime.Parse("07.07.2020 20:00:15"), 78 | Url = "https://www.patreon.com/media-u/Z0FBQUFBQmhXZDd3LXMwN0lJUFdVYTVIMEY1OGxzZTgwaFpQcW5TMk5WQVgxd2JVRFZvRXhjMjQ2V09oTW51eUpLQzIyOW1TdHRzYkY2Uk4yclAwX0VsSXBPMFZsNTBTcmZoaGx4OXJkR1Zham1CYl9fOWNVb3AzZGN1Wl9FMmNzcmIxc3hDek4xcHNuRV92LUVqQ0JESE4tcVBNYzlxYkRnWQ1=", 79 | Filename = "E0OarAVlc0iipzgUC7JdvBCf9fgSmbwk3xRDjRGByTM24SuMl6HkY1DIdGfcvnZhbTb978AHonnwqWNzMPEWBRQp007ateP9ByhB.png", 80 | UrlType = PatreonCrawledUrlType.PostFile 81 | }; 82 | 83 | PatreonCrawledUrlProcessor crawledUrlProcessor = new PatreonCrawledUrlProcessor(new PatreonRemoteFilenameRetriever()); 84 | await crawledUrlProcessor.BeforeStart(settings); 85 | await crawledUrlProcessor.ProcessCrawledUrl(crawledUrl); 86 | 87 | Assert.Equal(@"c:\downloads\UnitTesting\[123456] 2020-07-07 Test Post\post_E0OarAVlc0iipzgUC7JdvBCf9fgSmbwk3xRDjRGByTM24.png", crawledUrl.DownloadPath); 88 | } 89 | 90 | [Fact] 91 | public async Task ProcessCrawledUrl_PostMultipleFilesWithTheSameName_IdIsAppendedStartingWithSecondFile() 92 | { 93 | PatreonDownloaderSettings settings = new PatreonDownloaderSettings 94 | { 95 | CookieContainer = new CookieContainer(), 96 | DownloadDirectory = "c:\\downloads\\UnitTesting", 97 | MaxDownloadRetries = 10, 98 | FileExistsAction = FileExistsAction.KeepExisting, 99 | RetryMultiplier = 1, 100 | SaveAvatarAndCover = true, 101 | SaveDescriptions = true, 102 | SaveEmbeds = true, 103 | SaveJson = true, 104 | IsUseSubDirectories = true, 105 | SubDirectoryPattern = "[%PostId%] %PublishedAt% %PostTitle%", 106 | MaxFilenameLength = 50 107 | }; 108 | 109 | PatreonCrawledUrlProcessor crawledUrlProcessor = new PatreonCrawledUrlProcessor(new PatreonRemoteFilenameRetriever()); 110 | await crawledUrlProcessor.BeforeStart(settings); 111 | 112 | PatreonCrawledUrl crawledUrl = new PatreonCrawledUrl 113 | { 114 | PostId = "123456", 115 | Title = "Test Post", 116 | PublishedAt = DateTime.Parse("07.07.2020 20:00:15"), 117 | Url = "https://c10.patreonusercontent.com/4/patreon-media/p/post/123456/710deacb70e940d999bf2f3022e1e2f0/WAJhIjoxZZJwIjoxfQ%3D%3D/1.png?token-time=1661644800&token-hash=123", 118 | Filename = "1.png", 119 | UrlType = PatreonCrawledUrlType.PostMedia 120 | }; 121 | 122 | await crawledUrlProcessor.ProcessCrawledUrl(crawledUrl); 123 | 124 | Assert.Equal(@"c:\downloads\UnitTesting\[123456] 2020-07-07 Test Post\media_1.png", crawledUrl.DownloadPath); 125 | 126 | crawledUrl = new PatreonCrawledUrl 127 | { 128 | PostId = "123456", 129 | Title = "Test Post", 130 | PublishedAt = DateTime.Parse("07.07.2020 20:00:15"), 131 | Url = "https://c10.patreonusercontent.com/4/patreon-media/p/post/123456/110deacb70e940d999bf2f3022e1e2f0/WAJhIjoxZZJwIjoxfQ%3D%3D/1.png?token-time=1661644800&token-hash=123", 132 | Filename = "1.png", 133 | UrlType = PatreonCrawledUrlType.PostMedia 134 | }; 135 | 136 | await crawledUrlProcessor.ProcessCrawledUrl(crawledUrl); 137 | 138 | Assert.Equal(@"c:\downloads\UnitTesting\[123456] 2020-07-07 Test Post\media_1_110deacb70e940d999bf2f3022e1e2f0.png", crawledUrl.DownloadPath); 139 | 140 | crawledUrl = new PatreonCrawledUrl 141 | { 142 | PostId = "123456", 143 | Title = "Test Post", 144 | PublishedAt = DateTime.Parse("07.07.2020 20:00:15"), 145 | Url = "https://c10.patreonusercontent.com/4/2/patreon-media/p/post/123456/210deacb70e940d999bf2f3022e1e2f0/WAJhIjoxZZJwIjoxfQ%3D%3D/1.png?token-time=1661644800&token-hash=123", 146 | Filename = "1.png", 147 | UrlType = PatreonCrawledUrlType.PostMedia 148 | }; 149 | 150 | await crawledUrlProcessor.ProcessCrawledUrl(crawledUrl); 151 | 152 | Assert.Equal(@"c:\downloads\UnitTesting\[123456] 2020-07-07 Test Post\media_1_210deacb70e940d999bf2f3022e1e2f0.png", crawledUrl.DownloadPath); 153 | } 154 | } 155 | } 156 | -------------------------------------------------------------------------------- /PatreonDownloader.Implementation/PatreonCrawledUrlProcessor.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Concurrent; 3 | using System.Collections.Generic; 4 | using System.IO; 5 | using System.Text; 6 | using System.Text.RegularExpressions; 7 | using System.Threading; 8 | using System.Threading.Tasks; 9 | using NLog; 10 | using PatreonDownloader.Implementation.Enums; 11 | using PatreonDownloader.Implementation.Interfaces; 12 | using PatreonDownloader.Implementation.Models; 13 | using UniversalDownloaderPlatform.Common.Enums; 14 | using UniversalDownloaderPlatform.Common.Exceptions; 15 | using UniversalDownloaderPlatform.Common.Helpers; 16 | using UniversalDownloaderPlatform.Common.Interfaces; 17 | using UniversalDownloaderPlatform.Common.Interfaces.Models; 18 | 19 | namespace PatreonDownloader.Implementation 20 | { 21 | class PatreonCrawledUrlProcessor : ICrawledUrlProcessor 22 | { 23 | private static readonly HashSet _invalidFilenameCharacters; 24 | 25 | private readonly IRemoteFilenameRetriever _remoteFilenameRetriever; 26 | private readonly Logger _logger = LogManager.GetCurrentClassLogger(); 27 | private readonly SemaphoreSlim _duplicateNamesCheckSemaphore; 28 | 29 | private Dictionary _fileCountDict; //file counter for duplicate check 30 | private PatreonDownloaderSettings _patreonDownloaderSettings; 31 | 32 | private static readonly Regex _fileIdRegex = new Regex( 33 | "https:\\/\\/(.+)\\.patreonusercontent\\.com\\/(.+)\\/patreon-media\\/p\\/post\\/([0-9]+)\\/([a-z0-9]+)", 34 | RegexOptions.Compiled | RegexOptions.IgnoreCase); //Regex used to retrieve file id from its url 35 | 36 | static PatreonCrawledUrlProcessor() 37 | { 38 | _invalidFilenameCharacters = new HashSet(Path.GetInvalidFileNameChars()); 39 | _invalidFilenameCharacters.Add(':'); 40 | } 41 | 42 | public PatreonCrawledUrlProcessor(IRemoteFilenameRetriever remoteFilenameRetriever) 43 | { 44 | _remoteFilenameRetriever = remoteFilenameRetriever ?? 45 | throw new ArgumentNullException(nameof(remoteFilenameRetriever)); 46 | 47 | _duplicateNamesCheckSemaphore = new SemaphoreSlim(1, 1); 48 | 49 | _logger.Debug("KemonoCrawledUrlProcessor initialized"); 50 | } 51 | 52 | public async Task BeforeStart(IUniversalDownloaderPlatformSettings settings) 53 | { 54 | _fileCountDict = new Dictionary(); 55 | _patreonDownloaderSettings = (PatreonDownloaderSettings) settings; 56 | await _remoteFilenameRetriever.BeforeStart(settings); 57 | } 58 | 59 | public async Task ProcessCrawledUrl(ICrawledUrl udpCrawledUrl) 60 | { 61 | PatreonCrawledUrl crawledUrl = (PatreonCrawledUrl)udpCrawledUrl; 62 | 63 | if (crawledUrl.Url.IndexOf("youtube.com/watch?v=", StringComparison.Ordinal) != -1 || 64 | crawledUrl.Url.IndexOf("youtu.be/", StringComparison.Ordinal) != -1) 65 | { 66 | //TODO: YOUTUBE SUPPORT? 67 | _logger.Fatal($"[{crawledUrl.PostId}] [NOT SUPPORTED] YOUTUBE link found: {crawledUrl.Url}"); 68 | return false; 69 | } 70 | else if (crawledUrl.Url.IndexOf("imgur.com/", StringComparison.Ordinal) != -1) 71 | { 72 | //TODO: IMGUR SUPPORT 73 | _logger.Fatal($"[{crawledUrl.PostId}] [NOT SUPPORTED] IMGUR link found: {crawledUrl.Url}"); 74 | return false; 75 | } 76 | 77 | string filename = crawledUrl.Filename; 78 | 79 | if (!crawledUrl.IsProcessedByPlugin) 80 | { 81 | if (!_patreonDownloaderSettings.IsUseSubDirectories) 82 | filename = $"{crawledUrl.PostId}_"; 83 | else 84 | filename = ""; 85 | 86 | switch (crawledUrl.UrlType) 87 | { 88 | case PatreonCrawledUrlType.PostFile: 89 | filename += "post"; 90 | break; 91 | case PatreonCrawledUrlType.PostAttachment: 92 | filename += $"attachment"; 93 | if (!_patreonDownloaderSettings.IsUseLegacyFilenaming) 94 | filename += $"_{crawledUrl.FileId}"; 95 | break; 96 | case PatreonCrawledUrlType.PostMedia: 97 | filename += $"media"; 98 | if (!_patreonDownloaderSettings.IsUseLegacyFilenaming) 99 | filename += $"_{crawledUrl.FileId}"; 100 | break; 101 | case PatreonCrawledUrlType.AvatarFile: 102 | filename += "avatar"; 103 | break; 104 | case PatreonCrawledUrlType.CoverFile: 105 | filename += "cover"; 106 | break; 107 | case PatreonCrawledUrlType.ExternalUrl: 108 | filename += "external"; 109 | break; 110 | default: 111 | throw new ArgumentException($"Invalid url type: {crawledUrl.UrlType}"); 112 | } 113 | 114 | if (crawledUrl.Filename == null) 115 | { 116 | _logger.Debug($"No filename for {crawledUrl.Url}, trying to retrieve..."); 117 | string remoteFilename = 118 | await _remoteFilenameRetriever.RetrieveRemoteFileName(crawledUrl.Url); 119 | 120 | if (remoteFilename == null) 121 | { 122 | throw new DownloadException( 123 | $"[{crawledUrl.PostId}] Unable to retrieve name for external entry of type {crawledUrl.UrlType}: {crawledUrl.Url}"); 124 | } 125 | 126 | filename += $"_{remoteFilename}"; 127 | } 128 | else 129 | { 130 | filename += $"_{crawledUrl.Filename}"; 131 | } 132 | 133 | _logger.Debug($"Filename for {crawledUrl.Url} is {filename}"); 134 | 135 | _logger.Debug($"Sanitizing filename: {filename}"); 136 | filename = PathSanitizer.SanitizePath(filename); 137 | _logger.Debug($"Sanitized filename: {filename}"); 138 | 139 | if (filename.Length > _patreonDownloaderSettings.MaxFilenameLength) 140 | { 141 | _logger.Debug($"Filename is too long, will be truncated: {filename}"); 142 | string extension = Path.GetExtension(filename); 143 | if (extension.Length > 4) 144 | { 145 | _logger.Warn($"File extension for file {filename} is longer 4 characters and won't be appended to truncated filename!"); 146 | extension = ""; 147 | } 148 | filename = filename.Substring(0, _patreonDownloaderSettings.MaxFilenameLength) + extension; 149 | _logger.Debug($"Truncated filename: {filename}"); 150 | } 151 | 152 | string key = $"{crawledUrl.PostId}_{filename.ToLowerInvariant()}"; 153 | 154 | //Semaphore is required because of possible race condition between multiple threads 155 | await _duplicateNamesCheckSemaphore.WaitAsync(); 156 | 157 | int count = -1; 158 | try 159 | { 160 | if(_fileCountDict.ContainsKey(key)) 161 | _fileCountDict[key]++; 162 | else 163 | _fileCountDict[key] = 0; 164 | 165 | 166 | count = _fileCountDict[key]; 167 | } 168 | finally 169 | { 170 | _duplicateNamesCheckSemaphore.Release(); 171 | } 172 | 173 | if (count > 1) 174 | { 175 | _logger.Warn($"Found more than a single file with the name {filename} in the same folder in post {crawledUrl.PostId}, sequential number will be appended to its name."); 176 | 177 | string appendStr = count.ToString(); 178 | 179 | if (crawledUrl.UrlType != PatreonCrawledUrlType.ExternalUrl) 180 | { 181 | MatchCollection matches = _fileIdRegex.Matches(crawledUrl.Url); 182 | 183 | if (matches.Count == 0) 184 | throw new DownloadException($"[{crawledUrl.PostId}] Unable to retrieve file id for {crawledUrl.Url}, contact developer!"); 185 | if (matches.Count > 1) 186 | throw new DownloadException($"[{crawledUrl.PostId}] More than 1 media found in URL {crawledUrl.Url}"); 187 | 188 | appendStr = matches[0].Groups[4].Value; 189 | } 190 | 191 | filename = $"{Path.GetFileNameWithoutExtension(filename)}_{appendStr}{Path.GetExtension(filename)}"; 192 | } 193 | } 194 | 195 | string downloadDirectory = ""; 196 | 197 | if (_patreonDownloaderSettings.IsUseSubDirectories && 198 | crawledUrl.UrlType != PatreonCrawledUrlType.AvatarFile && 199 | crawledUrl.UrlType != PatreonCrawledUrlType.CoverFile) 200 | downloadDirectory = PostSubdirectoryHelper.CreateNameFromPattern(crawledUrl, _patreonDownloaderSettings.SubDirectoryPattern, _patreonDownloaderSettings.MaxSubdirectoryNameLength); 201 | 202 | crawledUrl.DownloadPath = !crawledUrl.IsProcessedByPlugin ? Path.Combine(downloadDirectory, filename) : downloadDirectory; 203 | 204 | return true; 205 | } 206 | } 207 | } 208 | -------------------------------------------------------------------------------- /PatreonDownloader.App/Program.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Linq; 3 | using System.Net; 4 | using System.Threading.Tasks; 5 | using CommandLine; 6 | using Microsoft.Extensions.Configuration; 7 | using NLog; 8 | using PatreonDownloader.App.Models; 9 | using PatreonDownloader.Implementation; 10 | using PatreonDownloader.Implementation.Models; 11 | using UniversalDownloaderPlatform.Common.Enums; 12 | using UniversalDownloaderPlatform.Common.Events; 13 | using UniversalDownloaderPlatform.Engine; 14 | 15 | namespace PatreonDownloader.App 16 | { 17 | class Program 18 | { 19 | private static readonly Logger _logger = LogManager.GetCurrentClassLogger(); 20 | private static UniversalDownloader _universalDownloader; 21 | private static IConfiguration _configuration; 22 | private static int _filesDownloaded; 23 | 24 | static async Task Main(string[] args) 25 | { 26 | _configuration = new ConfigurationBuilder() 27 | .AddJsonFile("settings.json", true, false) 28 | .Build(); 29 | 30 | NLogManager.ReconfigureNLog(); 31 | 32 | try 33 | { 34 | UpdateChecker updateChecker = new UpdateChecker(); 35 | (bool isUpdateAvailable, string updateMessage) = await updateChecker.IsNewVersionAvailable(); 36 | if (isUpdateAvailable) 37 | { 38 | _logger.Warn("New version is available at https://github.com/AlexCSDev/PatreonDownloader/releases"); 39 | if (updateMessage != null && !updateMessage.StartsWith("!")) 40 | _logger.Warn($"Note from developer: {updateMessage}"); 41 | } 42 | 43 | if (updateMessage != null && updateMessage.StartsWith("!")) 44 | _logger.Warn($"Note from developer: {updateMessage.Substring(1)}"); 45 | } 46 | catch (Exception ex) 47 | { 48 | _logger.Error($"Error encountered while checking for updates: {ex}", ex); 49 | } 50 | 51 | AppDomain.CurrentDomain.ProcessExit += CurrentDomain_ProcessExit; 52 | Console.CancelKeyPress += ConsoleOnCancelKeyPress; 53 | 54 | ParserResult parserResult = Parser.Default.ParseArguments(args); 55 | 56 | CommandLineOptions commandLineOptions = null; 57 | parserResult.WithParsed(options => 58 | { 59 | NLogManager.ReconfigureNLog(options.LogLevel, options.SaveLogs); 60 | commandLineOptions = options; 61 | }); 62 | 63 | if (commandLineOptions == null) 64 | return; 65 | 66 | try 67 | { 68 | await RunPatreonDownloader(commandLineOptions); 69 | } 70 | catch (Exception ex) 71 | { 72 | _logger.Fatal($"Fatal error, application will be closed: {ex}"); 73 | Environment.Exit(0); 74 | } 75 | } 76 | 77 | private static void ConsoleOnCancelKeyPress(object sender, ConsoleCancelEventArgs e) 78 | { 79 | _logger.Info("Cancellation requested"); 80 | Cleanup(); 81 | } 82 | 83 | private static void CurrentDomain_ProcessExit(object sender, EventArgs e) 84 | { 85 | _logger.Debug("Entered process exit"); 86 | Cleanup(); 87 | } 88 | 89 | private static void Cleanup() 90 | { 91 | _logger.Debug("Cleanup called"); 92 | if (_universalDownloader != null) 93 | { 94 | _logger.Debug("Disposing downloader..."); 95 | try 96 | { 97 | _universalDownloader.Dispose(); 98 | _universalDownloader = null; 99 | } 100 | catch (Exception ex) 101 | { 102 | _logger.Fatal($"Error during patreon downloader disposal! Exception: {ex}"); 103 | } 104 | } 105 | } 106 | 107 | private static async Task RunPatreonDownloader(CommandLineOptions commandLineOptions) 108 | { 109 | if (string.IsNullOrWhiteSpace(commandLineOptions.Url)) 110 | { 111 | _logger.Fatal("Creator url should be provided"); 112 | Environment.Exit(0); 113 | return; 114 | } 115 | 116 | _universalDownloader = new UniversalDownloader(new PatreonDownloaderModule()); 117 | 118 | _filesDownloaded = 0; 119 | 120 | _universalDownloader.StatusChanged += UniversalDownloaderOnStatusChanged; 121 | _universalDownloader.PostCrawlStart += UniversalDownloaderOnPostCrawlStart; 122 | //_patreonDownloader.PostCrawlEnd += UniversalDownloaderOnPostCrawlEnd; 123 | _universalDownloader.NewCrawledUrl += UniversalDownloaderOnNewCrawledUrl; 124 | _universalDownloader.CrawlerMessage += UniversalDownloaderOnCrawlerMessage; 125 | _universalDownloader.FileDownloaded += UniversalDownloaderOnFileDownloaded; 126 | 127 | PatreonDownloaderSettings settings = await InitializeSettings(commandLineOptions); 128 | await _universalDownloader.Download(commandLineOptions.Url, settings); 129 | 130 | _universalDownloader.StatusChanged -= UniversalDownloaderOnStatusChanged; 131 | _universalDownloader.PostCrawlStart -= UniversalDownloaderOnPostCrawlStart; 132 | //_universalDownloader.PostCrawlEnd -= UniversalDownloaderOnPostCrawlEnd; 133 | _universalDownloader.NewCrawledUrl -= UniversalDownloaderOnNewCrawledUrl; 134 | _universalDownloader.CrawlerMessage -= UniversalDownloaderOnCrawlerMessage; 135 | _universalDownloader.FileDownloaded -= UniversalDownloaderOnFileDownloaded; 136 | _universalDownloader.Dispose(); 137 | _universalDownloader = null; 138 | } 139 | 140 | private static async Task InitializeSettings(CommandLineOptions commandLineOptions) 141 | { 142 | if (!string.IsNullOrWhiteSpace(commandLineOptions.ProxyServerAddress) && 143 | !Uri.TryCreate(commandLineOptions.ProxyServerAddress, UriKind.Absolute, out _)) 144 | { 145 | throw new Exception($"Invalid proxy server address: {commandLineOptions.ProxyServerAddress}"); 146 | } 147 | 148 | PatreonDownloaderSettings settings = new PatreonDownloaderSettings 149 | { 150 | UrlBlackList = (_configuration["UrlBlackList"] ?? "").ToLowerInvariant().Split("|").ToList(), 151 | UserAgent = "Patreon/72.2.28 (Android; Android 14; Scale/2.10)", 152 | CookieContainer = null, 153 | SaveAvatarAndCover = commandLineOptions.SaveAvatarAndCover, 154 | SaveDescriptions = commandLineOptions.SaveDescriptions, 155 | SaveEmbeds = commandLineOptions.SaveEmbeds, 156 | SaveJson = commandLineOptions.SaveJson, 157 | DownloadDirectory = commandLineOptions.DownloadDirectory, 158 | FileExistsAction = commandLineOptions.FileExistsAction, 159 | IsCheckRemoteFileSize = !commandLineOptions.IsDisableRemoteFileSizeCheck, 160 | IsUseSubDirectories = commandLineOptions.UseSubDirectories, 161 | SubDirectoryPattern = commandLineOptions.SubDirectoryPattern, 162 | MaxSubdirectoryNameLength = commandLineOptions.MaxSubdirectoryNameLength, 163 | MaxFilenameLength = commandLineOptions.MaxFilenameLength, 164 | FallbackToContentTypeFilenames = commandLineOptions.FilenamesFallbackToContentType, 165 | ProxyServerAddress = commandLineOptions.ProxyServerAddress, 166 | IsUseLegacyFilenaming = commandLineOptions.IsUseLegacyFilenaming 167 | }; 168 | 169 | if (settings.IsUseLegacyFilenaming && (settings.FileExistsAction == FileExistsAction.BackupIfDifferent || settings.FileExistsAction == FileExistsAction.ReplaceIfDifferent)) 170 | throw new Exception("Legacy file naming cannot be used with BackupIfDifferent or ReplaceIfDifferent file exists action"); 171 | 172 | return settings; 173 | } 174 | 175 | private static void UniversalDownloaderOnCrawlerMessage(object sender, CrawlerMessageEventArgs e) 176 | { 177 | switch (e.MessageType) 178 | { 179 | case CrawlerMessageType.Info: 180 | _logger.Info(e.Message); 181 | break; 182 | case CrawlerMessageType.Warning: 183 | _logger.Warn(e.Message); 184 | break; 185 | case CrawlerMessageType.Error: 186 | _logger.Error(e.Message); 187 | break; 188 | default: 189 | throw new ArgumentOutOfRangeException(); 190 | } 191 | } 192 | 193 | private static void UniversalDownloaderOnNewCrawledUrl(object sender, NewCrawledUrlEventArgs e) 194 | { 195 | _logger.Info($" + {((PatreonCrawledUrl) e.CrawledUrl).UrlTypeAsFriendlyString}: {e.CrawledUrl.Url}"); 196 | } 197 | 198 | private static void UniversalDownloaderOnPostCrawlEnd(object sender, PostCrawlEventArgs e) 199 | { 200 | /*if(!e.Success) 201 | _logger.Error($"Post cannot be parsed: {e.ErrorMessage}");*/ 202 | //_logger.Info(e.Success ? "✓" : "✗"); 203 | } 204 | 205 | private static void UniversalDownloaderOnPostCrawlStart(object sender, PostCrawlEventArgs e) 206 | { 207 | _logger.Info($"-> {e.PostId}"); 208 | } 209 | 210 | private static void UniversalDownloaderOnFileDownloaded(object sender, FileDownloadedEventArgs e) 211 | { 212 | _filesDownloaded++; 213 | if (e.Success) 214 | _logger.Info($"Downloaded {_filesDownloaded}/{e.TotalFiles}: {e.Url}"); 215 | else 216 | _logger.Error($"Failed to download {e.Url}: {e.ErrorMessage}"); 217 | } 218 | 219 | private static void UniversalDownloaderOnStatusChanged(object sender, DownloaderStatusChangedEventArgs e) 220 | { 221 | switch (e.Status) 222 | { 223 | case DownloaderStatus.Ready: 224 | break; 225 | case DownloaderStatus.Initialization: 226 | _logger.Info("Preparing to download..."); 227 | break; 228 | case DownloaderStatus.RetrievingCampaignInformation: 229 | _logger.Info("Retrieving campaign information..."); 230 | break; 231 | case DownloaderStatus.Crawling: 232 | _logger.Info("Crawling..."); 233 | break; 234 | case DownloaderStatus.Downloading: 235 | _logger.Info("Downloading..."); 236 | break; 237 | case DownloaderStatus.Done: 238 | _logger.Info("Finished"); 239 | break; 240 | case DownloaderStatus.ExportingCrawlResults: 241 | _logger.Info("Exporting crawl results..."); 242 | break; 243 | default: 244 | throw new ArgumentOutOfRangeException(); 245 | } 246 | } 247 | } 248 | } -------------------------------------------------------------------------------- /PatreonDownloader.Implementation/Models/JSONObjects/Posts.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Text; 4 | using Newtonsoft.Json; 5 | 6 | // This file contains all classes used for representing deserialized json response of "posts" api endpoint 7 | namespace PatreonDownloader.Implementation.Models.JSONObjects.Posts 8 | { 9 | public class Embed 10 | { 11 | [JsonProperty("description")] 12 | public string Description { get; set; } 13 | [JsonProperty("html")] 14 | public object Html { get; set; } 15 | [JsonProperty("provider")] 16 | public string Provider { get; set; } 17 | [JsonProperty("provider_url")] 18 | public string ProviderUrl { get; set; } 19 | [JsonProperty("subject")] 20 | public string Subject { get; set; } 21 | [JsonProperty("url")] 22 | public string Url { get; set; } 23 | 24 | public override string ToString() 25 | { 26 | StringBuilder sb = new StringBuilder(); 27 | sb.Append($"Provider: {Provider}, Provider URL: {ProviderUrl}"); 28 | sb.Append(Environment.NewLine); 29 | sb.Append(Environment.NewLine); 30 | sb.Append($"Subject: {Subject}"); 31 | sb.Append(Environment.NewLine); 32 | sb.Append(Environment.NewLine); 33 | sb.Append($"Url: {Url}"); 34 | sb.Append(Environment.NewLine); 35 | sb.Append(Environment.NewLine); 36 | sb.Append($"Description: {Description}"); 37 | sb.Append(Environment.NewLine); 38 | sb.Append(Environment.NewLine); 39 | sb.Append($"Html: {Html}"); 40 | sb.Append(Environment.NewLine); 41 | sb.Append(Environment.NewLine); 42 | return sb.ToString(); 43 | } 44 | } 45 | public class Image 46 | { 47 | [JsonProperty("height")] 48 | public int? Height { get; set; } 49 | [JsonProperty("large_url")] 50 | public string LargeUrl { get; set; } 51 | [JsonProperty("thumb_url")] 52 | public string ThumbUrl { get; set; } 53 | [JsonProperty("url")] 54 | public string Url { get; set; } 55 | [JsonProperty("width")] 56 | public int? Width { get; set; } 57 | } 58 | 59 | public class PostFile 60 | { 61 | [JsonProperty("name")] 62 | public string Name { get; set; } 63 | [JsonProperty("url")] 64 | public string Url { get; set; } 65 | } 66 | 67 | public class RootDataAttributes 68 | { 69 | [JsonProperty("change_visibility_at")] 70 | public object ChangeVisibilityAt { get; set; } 71 | [JsonProperty("comment_count")] 72 | public int? CommentCount { get; set; } 73 | [JsonProperty("content")] 74 | public string Content { get; set; } 75 | [JsonProperty("current_user_can_delete")] 76 | public bool CurrentUserCanDelete { get; set; } 77 | [JsonProperty("current_user_can_view")] 78 | public bool CurrentUserCanView { get; set; } 79 | [JsonProperty("current_user_has_liked")] 80 | public bool CurrentUserHasLinked { get; set; } 81 | [JsonProperty("embed")] 82 | public Embed Embed { get; set; } 83 | [JsonProperty("image")] 84 | public Image Image { get; set; } 85 | [JsonProperty("is_paid")] 86 | public bool IsPaid { get; set; } 87 | [JsonProperty("like_count")] 88 | public int? LikeCount { get; set; } 89 | [JsonProperty("min_cents_pledged_to_view")] 90 | public int? MinCentsPledgedToView { get; set; } 91 | [JsonProperty("patreon_url")] 92 | public string PatreonUrl { get; set; } 93 | [JsonProperty("patron_count")] 94 | public int? PatronCount { get; set; } 95 | [JsonProperty("pledge_url")] 96 | public string PledgeUrl { get; set; } 97 | [JsonProperty("post_file")] 98 | public PostFile PostFile { get; set; } 99 | [JsonProperty("post_metadata")] 100 | public object PostMetadata { get; set; } 101 | [JsonProperty("post_type")] 102 | public string PostType { get; set; } 103 | [JsonProperty("published_at")] 104 | public DateTime PublishedAt { get; set; } 105 | [JsonProperty("teaser_text")] 106 | public object TeaserText { get; set; } 107 | [JsonProperty("title")] 108 | public string Title { get; set; } 109 | [JsonProperty("upgrade_url")] 110 | public string UpgradeUrl { get; set; } 111 | [JsonProperty("url")] 112 | public string Url { get; set; } 113 | [JsonProperty("was_posted_by_campaign_owner")] 114 | public bool WasPostedByCampaignOwner { get; set; } 115 | } 116 | 117 | public class AccessRules 118 | { 119 | [JsonProperty("data")] 120 | public List Data { get; set; } 121 | } 122 | 123 | public class AttachmentsMedia 124 | { 125 | [JsonProperty("data")] 126 | public List Data { get; set; } 127 | } 128 | 129 | public class Audio 130 | { 131 | [JsonProperty("data")] 132 | public object Data { get; set; } 133 | } 134 | 135 | public class Data 136 | { 137 | [JsonProperty("id")] 138 | public string Id { get; set; } 139 | [JsonProperty("type")] 140 | public string Type { get; set; } 141 | 142 | public long IdInt64 => Convert.ToInt64(Id); //todo: map property directly to int64? 143 | } 144 | 145 | public class CampaignLinks 146 | { 147 | [JsonProperty("related")] 148 | public string Related { get; set; } 149 | } 150 | 151 | public class Campaign 152 | { 153 | [JsonProperty("data")] 154 | public Data Data { get; set; } 155 | [JsonProperty("links")] 156 | public CampaignLinks Links { get; set; } 157 | } 158 | 159 | public class Images 160 | { 161 | [JsonProperty("data")] 162 | public List Data { get; set; } 163 | } 164 | 165 | public class Poll 166 | { 167 | [JsonProperty("data")] 168 | public object Data { get; set; } 169 | } 170 | 171 | public class UserLinks 172 | { 173 | [JsonProperty("related")] 174 | public string Related { get; set; } 175 | } 176 | 177 | public class User 178 | { 179 | [JsonProperty("data")] 180 | public Data Data { get; set; } 181 | [JsonProperty("links")] 182 | public UserLinks Links { get; set; } 183 | } 184 | 185 | public class UserDefinedTags 186 | { 187 | [JsonProperty("data")] 188 | public List Data { get; set; } 189 | } 190 | 191 | public class RootDataRelationships 192 | { 193 | [JsonProperty("access_rules")] 194 | public AccessRules AccessRules { get; set; } 195 | [JsonProperty("attachments_media")] 196 | public AttachmentsMedia AttachmentsMedia { get; set; } 197 | [JsonProperty("audio")] 198 | public Audio Audio { get; set; } 199 | [JsonProperty("campaign")] 200 | public Campaign Campaign { get; set; } 201 | [JsonProperty("images")] 202 | public Images Images { get; set; } 203 | [JsonProperty("poll")] 204 | public Poll Poll { get; set; } 205 | [JsonProperty("user")] 206 | public User User { get; set; } 207 | [JsonProperty("user_defined_tags")] 208 | public UserDefinedTags UserDefinedTags { get; set; } 209 | } 210 | 211 | public class RootData 212 | { 213 | [JsonProperty("attributes")] 214 | public RootDataAttributes Attributes { get; set; } 215 | [JsonProperty("id")] 216 | public string Id { get; set; } 217 | [JsonProperty("relationships")] 218 | public RootDataRelationships Relationships { get; set; } 219 | [JsonProperty("type")] 220 | public string Type { get; set; } 221 | } 222 | 223 | public class ImageUrls 224 | { 225 | [JsonProperty("default")] 226 | public string Default { get; set; } 227 | [JsonProperty("original")] 228 | public string Original { get; set; } 229 | [JsonProperty("thumbnail")] 230 | public string Thumbnail { get; set; } 231 | } 232 | 233 | public class Dimensions 234 | { 235 | [JsonProperty("h")] 236 | public int? Height { get; set; } 237 | [JsonProperty("w")] 238 | public int? Width { get; set; } 239 | } 240 | 241 | public class Metadata 242 | { 243 | [JsonProperty("dimensions")] 244 | public Dimensions Dimensions { get; set; } 245 | } 246 | 247 | public class IncludedAttributes 248 | { 249 | [JsonProperty("full_name")] 250 | public string FullName { get; set; } 251 | [JsonProperty("image_url")] 252 | public string ImageUrl { get; set; } 253 | [JsonProperty("url")] 254 | public string Url { get; set; } 255 | [JsonProperty("avatar_photo_url")] 256 | public string AvatarPhotoUrl { get; set; } 257 | [JsonProperty("earnings_visibility")] 258 | public string EarningsVisibility { get; set; } 259 | [JsonProperty("is_monthly")] 260 | public bool? IsMonthly { get; set; } 261 | [JsonProperty("is_nsfw")] 262 | public bool? IsNsfw { get; set; } 263 | [JsonProperty("name")] 264 | public string Name { get; set; } 265 | [JsonProperty("show_audio_post_download_links")] 266 | public bool? ShowAudioPostDownloadLinks { get; set; } 267 | [JsonProperty("download_url")] 268 | public string DownloadUrl { get; set; } 269 | [JsonProperty("file_name")] 270 | public string FileName { get; set; } 271 | [JsonProperty("image_urls")] 272 | public ImageUrls ImageUrls { get; set; } 273 | [JsonProperty("metadata")] 274 | public Metadata Metadata { get; set; } 275 | [JsonProperty("access_rule_type")] 276 | public string AccessRuleType { get; set; } 277 | [JsonProperty("amount_cents")] 278 | public object AmountCents { get; set; } 279 | [JsonProperty("post_count")] 280 | public int? PostCount { get; set; } 281 | } 282 | 283 | public class Tier 284 | { 285 | [JsonProperty("data")] 286 | public object Data { get; set; } 287 | } 288 | 289 | public class IncludedRelationships 290 | { 291 | [JsonProperty("tier")] 292 | public Tier Tier { get; set; } 293 | } 294 | 295 | public class Included 296 | { 297 | [JsonProperty("attributes")] 298 | public IncludedAttributes Attributes { get; set; } 299 | [JsonProperty("id")] 300 | public string Id { get; set; } 301 | [JsonProperty("type")] 302 | public string Type { get; set; } 303 | [JsonProperty("relationships")] 304 | public IncludedRelationships Relationships { get; set; } 305 | 306 | public long IdInt64 => Convert.ToInt64(Id); //todo: map property directly to int64? 307 | } 308 | 309 | public class RootLinks 310 | { 311 | [JsonProperty("next")] 312 | public string Next { get; set; } 313 | } 314 | 315 | public class Cursors 316 | { 317 | [JsonProperty("next")] 318 | public string Next { get; set; } 319 | } 320 | 321 | public class Pagination 322 | { 323 | [JsonProperty("cursors")] 324 | public Cursors Cursors { get; set; } 325 | [JsonProperty("total")] 326 | public int? Total { get; set; } 327 | } 328 | 329 | public class Meta 330 | { 331 | [JsonProperty("pagination")] 332 | public Pagination Pagination { get; set; } 333 | } 334 | 335 | public class Root 336 | { 337 | [JsonProperty("data")] 338 | public List Data { get; set; } 339 | [JsonProperty("included")] 340 | public List Included { get; set; } 341 | [JsonProperty("links")] 342 | public RootLinks Links { get; set; } 343 | 344 | [JsonProperty("meta")] 345 | public Meta Meta { get; set; } 346 | } 347 | } 348 | -------------------------------------------------------------------------------- /PatreonDownloader.Implementation/PatreonPageCrawler.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.IO; 4 | using System.Linq; 5 | using System.Threading.Tasks; 6 | using Newtonsoft.Json; 7 | using NLog; 8 | using PatreonDownloader.Implementation.Enums; 9 | using PatreonDownloader.Implementation.Models; 10 | using PatreonDownloader.Implementation.Models.JSONObjects.Posts; 11 | using UniversalDownloaderPlatform.Common.Enums; 12 | using UniversalDownloaderPlatform.Common.Events; 13 | using UniversalDownloaderPlatform.Common.Interfaces; 14 | using UniversalDownloaderPlatform.Common.Interfaces.Models; 15 | using UniversalDownloaderPlatform.DefaultImplementations.Models; 16 | 17 | namespace PatreonDownloader.Implementation 18 | { 19 | internal sealed class PatreonPageCrawler : IPageCrawler 20 | { 21 | private readonly IWebDownloader _webDownloader; 22 | private readonly IPluginManager _pluginManager; 23 | private readonly Logger _logger = LogManager.GetCurrentClassLogger(); 24 | 25 | private PatreonDownloaderSettings _patreonDownloaderSettings; 26 | 27 | public event EventHandler PostCrawlStart; 28 | public event EventHandler PostCrawlEnd; 29 | public event EventHandler NewCrawledUrl; 30 | public event EventHandler CrawlerMessage; 31 | 32 | //TODO: Research possibility of not hardcoding this string 33 | private const string CrawlStartUrl = "https://www.patreon.com/api/posts?" + 34 | "include=user%2Cattachments_media%2Ccampaign%2Cpoll.choices%2Cpoll.current_user_responses.user%2Cpoll.current_user_responses.choice%2Cpoll.current_user_responses.poll%2Caccess_rules.tier.null%2Cimages.null%2Caudio.null" + 35 | "&fields[post]=change_visibility_at%2Ccomment_count%2Ccontent%2Ccurrent_user_can_delete%2Ccurrent_user_can_view%2Ccurrent_user_has_liked%2Cembed%2Cimage%2Cis_paid%2Clike_count%2Cmin_cents_pledged_to_view%2Cpost_file%2Cpost_metadata%2Cpublished_at%2Cpatron_count%2Cpatreon_url%2Cpost_type%2Cpledge_url%2Cthumbnail_url%2Cteaser_text%2Ctitle%2Cupgrade_url%2Curl%2Cwas_posted_by_campaign_owner" + 36 | "&fields[user]=image_url%2Cfull_name%2Curl" + 37 | "&fields[campaign]=show_audio_post_download_links%2Cavatar_photo_url%2Cearnings_visibility%2Cis_nsfw%2Cis_monthly%2Cname%2Curl" + 38 | "&fields[access_rule]=access_rule_type%2Camount_cents" + 39 | "&fields[media]=id%2Cimage_urls%2Cdownload_url%2Cmetadata%2Cfile_name" + 40 | "&sort=-published_at" + 41 | "&filter[is_draft]=false&filter[contains_exclusive_posts]=true&json-api-use-default-includes=false&json-api-version=1.0"; 42 | 43 | public PatreonPageCrawler(IWebDownloader webDownloader, IPluginManager pluginManager) 44 | { 45 | _webDownloader = webDownloader ?? throw new ArgumentNullException(nameof(webDownloader)); 46 | _pluginManager = pluginManager ?? throw new ArgumentNullException(nameof(pluginManager)); 47 | } 48 | 49 | public async Task BeforeStart(IUniversalDownloaderPlatformSettings settings) 50 | { 51 | _patreonDownloaderSettings = (PatreonDownloaderSettings) settings; 52 | } 53 | 54 | public async Task> Crawl(ICrawlTargetInfo crawlTargetInfo) 55 | { 56 | PatreonCrawlTargetInfo patreonCrawlTargetInfo = (PatreonCrawlTargetInfo)crawlTargetInfo; 57 | if (patreonCrawlTargetInfo.Id < 1) 58 | throw new ArgumentException("Campaign ID cannot be less than 1"); 59 | if (string.IsNullOrEmpty(patreonCrawlTargetInfo.Name)) 60 | throw new ArgumentException("Campaign name cannot be null or empty"); 61 | 62 | _logger.Debug($"Starting crawling campaign {patreonCrawlTargetInfo.Name}"); 63 | List crawledUrls = new List(); 64 | Random rnd = new Random(Guid.NewGuid().GetHashCode()); 65 | 66 | if (_patreonDownloaderSettings.SaveAvatarAndCover) 67 | { 68 | _logger.Debug("Adding avatar and cover..."); 69 | if(!string.IsNullOrWhiteSpace(patreonCrawlTargetInfo.AvatarUrl)) 70 | crawledUrls.Add(new PatreonCrawledUrl { PostId = "0", Url = patreonCrawlTargetInfo.AvatarUrl, UrlType = PatreonCrawledUrlType.AvatarFile }); 71 | if (!string.IsNullOrWhiteSpace(patreonCrawlTargetInfo.CoverUrl)) 72 | crawledUrls.Add(new PatreonCrawledUrl { PostId = "0", Url = patreonCrawlTargetInfo.CoverUrl, UrlType = PatreonCrawledUrlType.CoverFile }); 73 | } 74 | 75 | string nextPage = CrawlStartUrl + $"&filter[campaign_id]={patreonCrawlTargetInfo.Id}"; 76 | 77 | int page = 0; 78 | while (!string.IsNullOrEmpty(nextPage)) 79 | { 80 | page++; 81 | _logger.Debug($"Page #{page}: {nextPage}"); 82 | string json = await _webDownloader.DownloadString(nextPage); 83 | 84 | if(_patreonDownloaderSettings.SaveJson) 85 | await File.WriteAllTextAsync(Path.Combine(_patreonDownloaderSettings.DownloadDirectory, $"page_{page}.json"), 86 | json); 87 | 88 | ParsingResult result = await ParsePage(json); 89 | 90 | if(result.CrawledUrls.Count > 0) 91 | crawledUrls.AddRange(result.CrawledUrls); 92 | 93 | nextPage = result.NextPage; 94 | 95 | await Task.Delay(500 * rnd.Next(1, 3)); //0.5 - 1 second delay 96 | } 97 | 98 | _logger.Debug("Finished crawl"); 99 | 100 | return crawledUrls; 101 | } 102 | 103 | private async Task ParsePage(string json) 104 | { 105 | List crawledUrls = new List(); 106 | List skippedIncludesList = new List(); //List for all included data which current account doesn't have access to 107 | 108 | Root jsonRoot = JsonConvert.DeserializeObject(json); 109 | 110 | _logger.Debug("Parsing data entries..."); 111 | foreach (var jsonEntry in jsonRoot.Data) 112 | { 113 | OnPostCrawlStart(new PostCrawlEventArgs(jsonEntry.Id, true)); 114 | _logger.Info($"-> {jsonEntry.Id}"); 115 | if (jsonEntry.Type != "post") 116 | { 117 | string msg = $"Invalid type for \"data\": {jsonEntry.Type}, skipping"; 118 | _logger.Error($"[{jsonEntry.Id}] {msg}"); 119 | OnPostCrawlEnd(new PostCrawlEventArgs(jsonEntry.Id, false, msg)); 120 | OnCrawlerMessage(new CrawlerMessageEventArgs(CrawlerMessageType.Error, msg, jsonEntry.Id)); 121 | continue; 122 | } 123 | 124 | _logger.Debug($"[{jsonEntry.Id}] Is a post"); 125 | if (!jsonEntry.Attributes.CurrentUserCanView) 126 | { 127 | _logger.Warn($"[{jsonEntry.Id}] Current user cannot view this post"); 128 | 129 | string[] skippedAttachments = jsonEntry.Relationships.AttachmentsMedia?.Data.Select(x => x.Id).ToArray() ?? new string[0]; 130 | string[] skippedMedia = jsonEntry.Relationships.Images?.Data.Select(x => x.Id).ToArray() ?? new string[0]; 131 | _logger.Debug($"[{jsonEntry.Id}] Adding {skippedAttachments.Length} attachments and {skippedMedia.Length} media items to skipped list"); 132 | 133 | skippedIncludesList.AddRange(skippedAttachments); 134 | skippedIncludesList.AddRange(skippedMedia); 135 | 136 | OnPostCrawlEnd(new PostCrawlEventArgs(jsonEntry.Id, false, "Current user cannot view this post")); 137 | OnCrawlerMessage(new CrawlerMessageEventArgs(CrawlerMessageType.Warning, "Current user cannot view this post", jsonEntry.Id)); 138 | continue; 139 | } 140 | 141 | PatreonCrawledUrl entry = new PatreonCrawledUrl 142 | { 143 | PostId = jsonEntry.Id, 144 | Title = jsonEntry.Attributes.Title, 145 | PublishedAt = jsonEntry.Attributes.PublishedAt 146 | }; 147 | 148 | string additionalFilesSaveDirectory = _patreonDownloaderSettings.DownloadDirectory; 149 | if (_patreonDownloaderSettings.IsUseSubDirectories && 150 | (_patreonDownloaderSettings.SaveDescriptions || 151 | (jsonEntry.Attributes.Embed != null && _patreonDownloaderSettings.SaveEmbeds) 152 | ) 153 | ) 154 | { 155 | additionalFilesSaveDirectory = Path.Combine(_patreonDownloaderSettings.DownloadDirectory, 156 | PostSubdirectoryHelper.CreateNameFromPattern(entry, _patreonDownloaderSettings.SubDirectoryPattern, _patreonDownloaderSettings.MaxSubdirectoryNameLength)); 157 | if (!Directory.Exists(additionalFilesSaveDirectory)) 158 | Directory.CreateDirectory(additionalFilesSaveDirectory); 159 | } 160 | 161 | if (_patreonDownloaderSettings.SaveDescriptions) 162 | { 163 | try 164 | { 165 | string filename = "description.html"; 166 | if (!_patreonDownloaderSettings.IsUseSubDirectories) 167 | filename = $"{jsonEntry.Id}_{filename}"; 168 | 169 | await File.WriteAllTextAsync(Path.Combine(additionalFilesSaveDirectory, filename), 170 | jsonEntry.Attributes.Content); 171 | } 172 | catch (Exception ex) 173 | { 174 | string msg = $"Unable to save description: {ex}"; 175 | _logger.Error($"[{jsonEntry.Id}] {msg}"); 176 | OnCrawlerMessage(new CrawlerMessageEventArgs(CrawlerMessageType.Error, msg, jsonEntry.Id)); 177 | } 178 | } 179 | 180 | if (jsonEntry.Attributes.Embed != null) 181 | { 182 | if (_patreonDownloaderSettings.SaveEmbeds) 183 | { 184 | _logger.Debug($"[{jsonEntry.Id}] Embed found, metadata will be saved"); 185 | try 186 | { 187 | string filename = "embed.txt"; 188 | if (!_patreonDownloaderSettings.IsUseSubDirectories) 189 | filename = $"{jsonEntry.Id}_{filename}"; 190 | 191 | await File.WriteAllTextAsync( 192 | Path.Combine(additionalFilesSaveDirectory, filename), 193 | jsonEntry.Attributes.Embed.ToString()); 194 | } 195 | catch (Exception ex) 196 | { 197 | string msg = $"Unable to save embed metadata: {ex}"; 198 | _logger.Error($"[{jsonEntry.Id}] {msg}"); 199 | OnCrawlerMessage(new CrawlerMessageEventArgs(CrawlerMessageType.Error, msg, 200 | jsonEntry.Id)); 201 | } 202 | } 203 | 204 | PatreonCrawledUrl subEntry = (PatreonCrawledUrl)entry.Clone(); 205 | subEntry.Url = jsonEntry.Attributes.Embed.Url; 206 | subEntry.UrlType = PatreonCrawledUrlType.ExternalUrl; 207 | crawledUrls.Add(subEntry); 208 | _logger.Info( 209 | $"[{jsonEntry.Id}] New embed entry: {subEntry.Url}"); 210 | 211 | OnNewCrawledUrl(new NewCrawledUrlEventArgs((CrawledUrl)subEntry.Clone())); 212 | } 213 | 214 | //External urls via plugins (including direct via default plugin) 215 | List pluginUrls = await _pluginManager.ExtractSupportedUrls(jsonEntry.Attributes.Content); 216 | foreach (string url in pluginUrls) 217 | { 218 | PatreonCrawledUrl subEntry = (PatreonCrawledUrl)entry.Clone(); 219 | subEntry.Url = url; 220 | subEntry.UrlType = PatreonCrawledUrlType.ExternalUrl; 221 | crawledUrls.Add(subEntry); 222 | _logger.Info($"[{jsonEntry.Id}] New external entry: {subEntry.Url}"); 223 | OnNewCrawledUrl(new NewCrawledUrlEventArgs((CrawledUrl)subEntry.Clone())); 224 | } 225 | 226 | _logger.Debug($"[{jsonEntry.Id}] Scanning attachment data"); 227 | //Attachments 228 | if(jsonEntry.Relationships.AttachmentsMedia?.Data != null) 229 | { 230 | foreach (var attachment in jsonEntry.Relationships.AttachmentsMedia.Data) 231 | { 232 | _logger.Debug($"[{jsonEntry.Id} A-{attachment.Id}] Scanning attachment"); 233 | if (attachment.Type != "media") //sanity check 234 | { 235 | string msg = $"Invalid attachment type for {attachment.Id}!!!"; 236 | _logger.Fatal($"[{jsonEntry.Id}] {msg}"); 237 | OnCrawlerMessage(new CrawlerMessageEventArgs(CrawlerMessageType.Error, msg, jsonEntry.Id)); 238 | continue; 239 | } 240 | 241 | var attachmentData = jsonRoot.Included.FirstOrDefault(x => x.Type == "media" && x.Id == attachment.Id); 242 | 243 | if (attachmentData == null) 244 | { 245 | string msg = $"Attachment data not found for {attachment.Id}!!!"; 246 | _logger.Fatal($"[{jsonEntry.Id}] {msg}"); 247 | OnCrawlerMessage(new CrawlerMessageEventArgs(CrawlerMessageType.Error, msg, jsonEntry.Id)); 248 | continue; 249 | } 250 | 251 | PatreonCrawledUrl subEntry = (PatreonCrawledUrl)entry.Clone(); ; 252 | subEntry.Url = attachmentData.Attributes.DownloadUrl; 253 | subEntry.Filename = attachmentData.Attributes.FileName; 254 | subEntry.UrlType = PatreonCrawledUrlType.PostAttachment; 255 | subEntry.FileId = attachmentData.Id; 256 | crawledUrls.Add(subEntry); 257 | _logger.Info($"[{jsonEntry.Id} A-{attachment.Id}] New attachment entry: {subEntry.Url}"); 258 | OnNewCrawledUrl(new NewCrawledUrlEventArgs((CrawledUrl)subEntry.Clone())); 259 | } 260 | } 261 | 262 | _logger.Debug($"[{jsonEntry.Id}] Scanning media data"); 263 | //Media 264 | if (jsonEntry.Relationships.Images?.Data != null) 265 | { 266 | foreach (var image in jsonEntry.Relationships.Images.Data) 267 | { 268 | _logger.Debug($"[{jsonEntry.Id} M-{image.Id}] Scanning media"); 269 | if (image.Type != "media") //sanity check 270 | { 271 | string msg = $"invalid media type for {image.Id}!!!"; 272 | _logger.Fatal($"[{jsonEntry.Id}] {msg}"); 273 | OnCrawlerMessage(new CrawlerMessageEventArgs(CrawlerMessageType.Error, msg, jsonEntry.Id)); 274 | continue; 275 | } 276 | 277 | var imageData = jsonRoot.Included.FirstOrDefault(x => x.Type == "media" && x.Id == image.Id); 278 | 279 | if (imageData == null) 280 | { 281 | string msg = $"media data not found for {image.Id}!!!"; 282 | _logger.Fatal($"[{jsonEntry.Id}] {msg}"); 283 | OnCrawlerMessage(new CrawlerMessageEventArgs(CrawlerMessageType.Error, msg, jsonEntry.Id)); 284 | continue; 285 | } 286 | 287 | _logger.Debug($"[{jsonEntry.Id} M-{image.Id}] Searching for download url"); 288 | string downloadUrl = imageData.Attributes.DownloadUrl; 289 | 290 | _logger.Debug($"[{jsonEntry.Id} M-{image.Id}] Download url is: {downloadUrl}"); 291 | 292 | PatreonCrawledUrl subEntry = (PatreonCrawledUrl)entry.Clone(); ; 293 | subEntry.Url = downloadUrl; 294 | subEntry.Filename = imageData.Attributes.FileName; 295 | subEntry.UrlType = PatreonCrawledUrlType.PostMedia; 296 | subEntry.FileId = image.Id; 297 | crawledUrls.Add(subEntry); 298 | _logger.Info($"[{jsonEntry.Id} M-{image.Id}] New media entry from {subEntry.Url}"); 299 | OnNewCrawledUrl(new NewCrawledUrlEventArgs((CrawledUrl)subEntry.Clone())); 300 | } 301 | } 302 | 303 | _logger.Debug($"[{jsonEntry.Id}] Parsing base entry"); 304 | //Now parse the entry itself, url type is set just before adding entry into list 305 | if (jsonEntry.Attributes.PostFile != null) 306 | { 307 | _logger.Debug($"[{jsonEntry.Id}] Found file data"); 308 | entry.Url = jsonEntry.Attributes.PostFile.Url; 309 | entry.Filename = jsonEntry.Attributes.PostFile.Name; 310 | } 311 | else 312 | { 313 | _logger.Debug($"[{jsonEntry.Id}] No file data, fallback to image data"); 314 | if (jsonEntry.Attributes.Image != null) 315 | { 316 | _logger.Debug($"[{jsonEntry.Id}] Found image data"); 317 | if (jsonEntry.Attributes.Image.LargeUrl != null) 318 | { 319 | _logger.Debug($"[{jsonEntry.Id}] Found large url"); 320 | entry.Url = jsonEntry.Attributes.Image.LargeUrl; 321 | } 322 | else if (jsonEntry.Attributes.Image.Url != null) 323 | { 324 | _logger.Debug($"[{jsonEntry.Id}] Found regular url"); 325 | entry.Url = jsonEntry.Attributes.Image.Url; 326 | 327 | } 328 | else 329 | { 330 | _logger.Warn($"[{jsonEntry.Id}] No valid image data found"); 331 | OnCrawlerMessage(new CrawlerMessageEventArgs(CrawlerMessageType.Warning, "No valid image data found", jsonEntry.Id)); 332 | } 333 | } 334 | } 335 | 336 | if (!string.IsNullOrEmpty(entry.Url)) 337 | { 338 | entry.UrlType = PatreonCrawledUrlType.PostFile; 339 | _logger.Info($"[{jsonEntry.Id}] New entry: {entry.Url}"); 340 | crawledUrls.Add(entry); 341 | OnNewCrawledUrl(new NewCrawledUrlEventArgs((CrawledUrl)entry.Clone())); 342 | } 343 | else 344 | { 345 | _logger.Warn($"[{jsonEntry.Id}] Post entry doesn't have download url"); 346 | OnCrawlerMessage(new CrawlerMessageEventArgs(CrawlerMessageType.Warning, "Post entry doesn't have download url")); 347 | } 348 | } 349 | 350 | _logger.Debug("Checking if all included entries were added..."); 351 | foreach (var jsonEntry in jsonRoot.Included) 352 | { 353 | _logger.Debug($"[{jsonEntry.Id}] Verification: Started"); 354 | if (jsonEntry.Type != "attachment" && jsonEntry.Type != "media") 355 | { 356 | if (jsonEntry.Type != "user" && 357 | jsonEntry.Type != "campaign" && 358 | jsonEntry.Type != "access-rule" && 359 | jsonEntry.Type != "reward" && 360 | jsonEntry.Type != "poll_choice" && 361 | jsonEntry.Type != "poll_response") 362 | { 363 | string msg = $"Verification for {jsonEntry.Id}: Unknown type for \"included\": {jsonEntry.Type}"; 364 | _logger.Error(msg); 365 | OnCrawlerMessage(new CrawlerMessageEventArgs(CrawlerMessageType.Error, msg)); 366 | } 367 | continue; 368 | } 369 | 370 | _logger.Debug($"[{jsonEntry.Id}] Is a {jsonEntry.Type}"); 371 | 372 | if (jsonEntry.Type == "attachment") 373 | { 374 | if (!skippedIncludesList.Any(x => x == jsonEntry.Id) && !crawledUrls.Any(x => x.Url == jsonEntry.Attributes.Url)) 375 | { 376 | string msg = 377 | $"Verification for {jsonEntry.Id}: Parsing verification failure! Attachment with this id might not referenced by any post."; 378 | _logger.Warn(msg); 379 | OnCrawlerMessage(new CrawlerMessageEventArgs(CrawlerMessageType.Warning, msg)); 380 | continue; 381 | } 382 | } 383 | 384 | if (jsonEntry.Type == "media") 385 | { 386 | if (!skippedIncludesList.Any(x=>x == jsonEntry.Id) && !crawledUrls.Any(x => x.Url == jsonEntry.Attributes.DownloadUrl/* || x.DownloadUrl == jsonEntry.Attributes.ImageUrls.Original || x.DownloadUrl == jsonEntry.Attributes.ImageUrls.Default*/)) 387 | { 388 | string msg = 389 | $"Verification for {jsonEntry.Id}: Parsing verification failure! Media with this id might not be referenced by any post."; 390 | _logger.Warn(msg); 391 | OnCrawlerMessage(new CrawlerMessageEventArgs(CrawlerMessageType.Warning, msg)); 392 | continue; 393 | } 394 | } 395 | 396 | _logger.Debug($"[{jsonEntry.Id}] Verification: OK"); 397 | OnPostCrawlEnd(new PostCrawlEventArgs(jsonEntry.Id, true)); 398 | } 399 | 400 | return new ParsingResult {CrawledUrls = crawledUrls, NextPage = jsonRoot.Links?.Next}; 401 | } 402 | 403 | private void OnPostCrawlStart(PostCrawlEventArgs e) 404 | { 405 | EventHandler handler = PostCrawlStart; 406 | handler?.Invoke(this, e); 407 | } 408 | 409 | private void OnPostCrawlEnd(PostCrawlEventArgs e) 410 | { 411 | EventHandler handler = PostCrawlEnd; 412 | handler?.Invoke(this, e); 413 | } 414 | 415 | private void OnNewCrawledUrl(NewCrawledUrlEventArgs e) 416 | { 417 | EventHandler handler = NewCrawledUrl; 418 | handler?.Invoke(this, e); 419 | } 420 | 421 | private void OnCrawlerMessage(CrawlerMessageEventArgs e) 422 | { 423 | EventHandler handler = CrawlerMessage; 424 | handler?.Invoke(this, e); 425 | } 426 | } 427 | } 428 | --------------------------------------------------------------------------------