├── .github └── workflows │ ├── release.yml │ └── tests.yml ├── .gitignore ├── LICENSE ├── README.md ├── srcs ├── HttpZipDirectory.cs ├── HttpZipEntry.cs ├── HttpZipStream.cs └── HttpZipStream.csproj └── test ├── HttpZipStream.Test.csproj └── HttpZipStreamTest.cs /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | 7 | jobs: 8 | build: 9 | name: Build Package 10 | runs-on: ubuntu-latest 11 | 12 | steps: 13 | 14 | - name: Checkout Source Repository 15 | uses: actions/checkout@v2 16 | 17 | - name: Setup .NET Core 18 | uses: actions/setup-dotnet@v1 19 | with: 20 | dotnet-version: 3.1.101 21 | 22 | - name: Dump GitHub context 23 | env: 24 | GITHUB_CONTEXT: ${{ toJson(github) }} 25 | run: echo "$GITHUB_CONTEXT" 26 | 27 | - name: Build and Pack Assemblies 28 | run: dotnet pack ./srcs --configuration Release /p:BuildVersion=${{ github.run_number }} --output ./srcs/bin/Publish 29 | 30 | - name: Store Artifacts 31 | uses: actions/upload-artifact@v1.0.0 32 | with: 33 | name: artifacts 34 | path: ./srcs/bin/Publish 35 | 36 | deploy: 37 | needs: [build] 38 | name: Publish Package 39 | runs-on: ubuntu-latest 40 | 41 | steps: 42 | 43 | - name: Setup .NET Core 44 | uses: actions/setup-dotnet@v1 45 | with: 46 | dotnet-version: 3.1.101 47 | 48 | - name: Retrieve Artifacts 49 | uses: actions/download-artifact@v1.0.0 50 | with: 51 | name: artifacts 52 | 53 | - name: Publish Artifact to NuGet 54 | run: dotnet nuget push artifacts/*.nupkg --api-key ${{ secrets.NUGET_TOKEN_FOR_GITHUB }} --source https://nuget.org --skip-duplicate --no-symbols true 55 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | 3 | on: 4 | pull_request: 5 | 6 | jobs: 7 | test: 8 | name: Execute Tests 9 | runs-on: ubuntu-latest 10 | 11 | steps: 12 | 13 | - name: Checkout Source Repository 14 | uses: actions/checkout@v2 15 | 16 | - name: Setup .NET Core 17 | uses: actions/setup-dotnet@v1 18 | with: 19 | dotnet-version: 3.1.101 20 | 21 | - name: Execute Tests 22 | run: dotnet test ./test 23 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | ## 4 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore 5 | 6 | # User-specific files 7 | *.suo 8 | *.user 9 | *.userosscache 10 | *.sln.docstates 11 | 12 | # User-specific files (MonoDevelop/Xamarin Studio) 13 | *.userprefs 14 | 15 | # Build results 16 | [Dd]ebug/ 17 | [Dd]ebugPublic/ 18 | [Rr]elease/ 19 | [Rr]eleases/ 20 | x64/ 21 | x86/ 22 | bld/ 23 | [Bb]in/ 24 | [Oo]bj/ 25 | [Ll]og/ 26 | 27 | # Visual Studio 2015/2017 cache/options directory 28 | .vs/ 29 | # Uncomment if you have tasks that create the project's static files in wwwroot 30 | #wwwroot/ 31 | 32 | # Visual Studio 2017 auto generated files 33 | Generated\ Files/ 34 | 35 | # MSTest test Results 36 | [Tt]est[Rr]esult*/ 37 | [Bb]uild[Ll]og.* 38 | 39 | # NUNIT 40 | *.VisualState.xml 41 | TestResult.xml 42 | 43 | # Build Results of an ATL Project 44 | [Dd]ebugPS/ 45 | [Rr]eleasePS/ 46 | dlldata.c 47 | 48 | # Benchmark Results 49 | BenchmarkDotNet.Artifacts/ 50 | 51 | # .NET Core 52 | project.lock.json 53 | project.fragment.lock.json 54 | artifacts/ 55 | **/Properties/launchSettings.json 56 | 57 | # StyleCop 58 | StyleCopReport.xml 59 | 60 | # Files built by Visual Studio 61 | *_i.c 62 | *_p.c 63 | *_i.h 64 | *.ilk 65 | *.meta 66 | *.obj 67 | *.iobj 68 | *.pch 69 | *.pdb 70 | *.ipdb 71 | *.pgc 72 | *.pgd 73 | *.rsp 74 | *.sbr 75 | *.tlb 76 | *.tli 77 | *.tlh 78 | *.tmp 79 | *.tmp_proj 80 | *.log 81 | *.vspscc 82 | *.vssscc 83 | .builds 84 | *.pidb 85 | *.svclog 86 | *.scc 87 | 88 | # Chutzpah Test files 89 | _Chutzpah* 90 | 91 | # Visual C++ cache files 92 | ipch/ 93 | *.aps 94 | *.ncb 95 | *.opendb 96 | *.opensdf 97 | *.sdf 98 | *.cachefile 99 | *.VC.db 100 | *.VC.VC.opendb 101 | 102 | # Visual Studio profiler 103 | *.psess 104 | *.vsp 105 | *.vspx 106 | *.sap 107 | 108 | # Visual Studio Trace Files 109 | *.e2e 110 | 111 | # TFS 2012 Local Workspace 112 | $tf/ 113 | 114 | # Guidance Automation Toolkit 115 | *.gpState 116 | 117 | # ReSharper is a .NET coding add-in 118 | _ReSharper*/ 119 | *.[Rr]e[Ss]harper 120 | *.DotSettings.user 121 | 122 | # JustCode is a .NET coding add-in 123 | .JustCode 124 | 125 | # TeamCity is a build add-in 126 | _TeamCity* 127 | 128 | # DotCover is a Code Coverage Tool 129 | *.dotCover 130 | 131 | # AxoCover is a Code Coverage Tool 132 | .axoCover/* 133 | !.axoCover/settings.json 134 | 135 | # Visual Studio code coverage results 136 | *.coverage 137 | *.coveragexml 138 | 139 | # NCrunch 140 | _NCrunch_* 141 | .*crunch*.local.xml 142 | nCrunchTemp_* 143 | 144 | # MightyMoose 145 | *.mm.* 146 | AutoTest.Net/ 147 | 148 | # Web workbench (sass) 149 | .sass-cache/ 150 | 151 | # Installshield output folder 152 | [Ee]xpress/ 153 | 154 | # DocProject is a documentation generator add-in 155 | DocProject/buildhelp/ 156 | DocProject/Help/*.HxT 157 | DocProject/Help/*.HxC 158 | DocProject/Help/*.hhc 159 | DocProject/Help/*.hhk 160 | DocProject/Help/*.hhp 161 | DocProject/Help/Html2 162 | DocProject/Help/html 163 | 164 | # Click-Once directory 165 | publish/ 166 | 167 | # Publish Web Output 168 | *.[Pp]ublish.xml 169 | *.azurePubxml 170 | # Note: Comment the next line if you want to checkin your web deploy settings, 171 | # but database connection strings (with potential passwords) will be unencrypted 172 | *.pubxml 173 | *.publishproj 174 | 175 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 176 | # checkin your Azure Web App publish settings, but sensitive information contained 177 | # in these scripts will be unencrypted 178 | PublishScripts/ 179 | 180 | # NuGet Packages 181 | *.nupkg 182 | # The packages folder can be ignored because of Package Restore 183 | **/[Pp]ackages/* 184 | # except build/, which is used as an MSBuild target. 185 | !**/[Pp]ackages/build/ 186 | # Uncomment if necessary however generally it will be regenerated when needed 187 | #!**/[Pp]ackages/repositories.config 188 | # NuGet v3's project.json files produces more ignorable files 189 | *.nuget.props 190 | *.nuget.targets 191 | 192 | # Microsoft Azure Build Output 193 | csx/ 194 | *.build.csdef 195 | 196 | # Microsoft Azure Emulator 197 | ecf/ 198 | rcf/ 199 | 200 | # Windows Store app package directories and files 201 | AppPackages/ 202 | BundleArtifacts/ 203 | Package.StoreAssociation.xml 204 | _pkginfo.txt 205 | *.appx 206 | 207 | # Visual Studio cache files 208 | # files ending in .cache can be ignored 209 | *.[Cc]ache 210 | # but keep track of directories ending in .cache 211 | !*.[Cc]ache/ 212 | 213 | # Others 214 | ClientBin/ 215 | ~$* 216 | *~ 217 | *.dbmdl 218 | *.dbproj.schemaview 219 | *.jfm 220 | *.pfx 221 | *.publishsettings 222 | orleans.codegen.cs 223 | 224 | # Including strong name files can present a security risk 225 | # (https://github.com/github/gitignore/pull/2483#issue-259490424) 226 | #*.snk 227 | 228 | # Since there are multiple workflows, uncomment next line to ignore bower_components 229 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 230 | #bower_components/ 231 | 232 | # RIA/Silverlight projects 233 | Generated_Code/ 234 | 235 | # Backup & report files from converting an old project file 236 | # to a newer Visual Studio version. Backup files are not needed, 237 | # because we have git ;-) 238 | _UpgradeReport_Files/ 239 | Backup*/ 240 | UpgradeLog*.XML 241 | UpgradeLog*.htm 242 | ServiceFabricBackup/ 243 | *.rptproj.bak 244 | 245 | # SQL Server files 246 | *.mdf 247 | *.ldf 248 | *.ndf 249 | 250 | # Business Intelligence projects 251 | *.rdl.data 252 | *.bim.layout 253 | *.bim_*.settings 254 | *.rptproj.rsuser 255 | 256 | # Microsoft Fakes 257 | FakesAssemblies/ 258 | 259 | # GhostDoc plugin setting file 260 | *.GhostDoc.xml 261 | 262 | # Node.js Tools for Visual Studio 263 | .ntvs_analysis.dat 264 | node_modules/ 265 | 266 | # Visual Studio 6 build log 267 | *.plg 268 | 269 | # Visual Studio 6 workspace options file 270 | *.opt 271 | 272 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.) 273 | *.vbw 274 | 275 | # Visual Studio LightSwitch build output 276 | **/*.HTMLClient/GeneratedArtifacts 277 | **/*.DesktopClient/GeneratedArtifacts 278 | **/*.DesktopClient/ModelManifest.xml 279 | **/*.Server/GeneratedArtifacts 280 | **/*.Server/ModelManifest.xml 281 | _Pvt_Extensions 282 | 283 | # Paket dependency manager 284 | .paket/paket.exe 285 | paket-files/ 286 | 287 | # FAKE - F# Make 288 | .fake/ 289 | 290 | # JetBrains Rider 291 | .idea/ 292 | *.sln.iml 293 | 294 | # CodeRush 295 | .cr/ 296 | 297 | # Python Tools for Visual Studio (PTVS) 298 | __pycache__/ 299 | *.pyc 300 | 301 | # Cake - Uncomment if you are using it 302 | # tools/** 303 | # !tools/packages.config 304 | 305 | # Tabs Studio 306 | *.tss 307 | 308 | # Telerik's JustMock configuration file 309 | *.jmconfig 310 | 311 | # BizTalk build output 312 | *.btp.cs 313 | *.btm.cs 314 | *.odx.cs 315 | *.xsd.cs 316 | 317 | # OpenCover UI analysis results 318 | OpenCover/ 319 | 320 | # Azure Stream Analytics local run output 321 | ASALocalRun/ 322 | 323 | # MSBuild Binary and Structured Log 324 | *.binlog 325 | 326 | # NVidia Nsight GPU debugger configuration file 327 | *.nvuser 328 | 329 | # MFractors (Xamarin productivity tool) working folder 330 | .mfractor/ 331 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Leverson Carlos 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # HttpZipStream 2 | A simple library to extract specific entries from a remote http zip archive without the need to download the entire file. 3 | ![Release](https://github.com/LeversonCarlos/HttpZipStream/workflows/Release/badge.svg) 4 | 5 | ## Understanding the magic 6 | When opening a zip archive using a remote url, the zip library will need to download the entire file to be able to read its contents. So if you had a 90 mega zipfile and wanted only a 100 kbyte file from within it, you will end doing the entire 90 mega download anyway. 7 | The [zip format](https://en.wikipedia.org/wiki/Zip_(file_format)) defines a directory pointing to all it's inner entries. Containing properties like names, starting offset, size, and other stuff. And this directory is pretty small, just a few bytes placed on the very end of the archive. So, if we could just read this directory, we could know where, on the entire zip archive, is stored the file we want. 8 | And if we could just request from the remote url, just that part of the content, we could get a smaller download, with just what we want and need. 9 | Turns out that the http protocol supports a technique called [byte serving](https://en.wikipedia.org/wiki/Byte_serving). That states that we could define some header parameters on the http request specifying the byte ranges we want for that request. 10 | With that in mind, what we do it's pretty simple. We make a first http request asking just for the http headers (not its content) and from that we know the content size. Then we make a small range requests at the end of the file, extracting all the directory info. Then, for the entries we want, we make requests for just that ranges. Apply the deflate algoritm and it's done. 11 | With this approach, we end doing more http requests, so its only good to use if the desired content represents a small part of the entire zip archive. 12 | More on this, can be found on my [medium](https://medium.com/@lcjohnny/httpzipstream-extracting-single-entry-from-remote-zip-without-downloading-the-entire-file-7a0f3d24a6fc) article. 13 | 14 | ## Install instructions 15 | You can add the library to your project using the [nuget](https://www.nuget.org/packages/HttpZipStream) package: 16 | ``` 17 | dotnet add package HttpZipStream 18 | ``` 19 | 20 | ## Sample of how to use the library 21 | Extracting just the first entry from a remote zip archive: 22 | ```csharp 23 | var httpUrl = "http://MyRemoteFile.zip"; 24 | using (var zipStream = new System.IO.Compression.HttpZipStream(httpUrl)) 25 | { 26 | var entryList = await zipStream.GetEntriesAsync(); 27 | var entry = entryList.FirstOrDefault(); 28 | byte[] entryContent = await zipStream.ExtractAsync(entry); 29 | /* do what you want with the entry content */ 30 | } 31 | ``` 32 | 33 | ## Build using 34 | * [DotNET Core](https://dotnet.github.io) 35 | * [xUnit](https://xunit.github.io) 36 | * [vsCode](https://github.com/Microsoft/vscode) 37 | * [ZipFormat](https://en.wikipedia.org/wiki/Zip_(file_format)) 38 | 39 | ## Changelog 40 | ### v0.1.* 41 | - Some minor documentation adjust. 42 | - Proper name convention for async methods. 43 | - Preparing projects to be build, packed and deploy by the server. 44 | ### v0.2.* 45 | - Implementing a ExtractAsync overload that results just the entry content byte array. 46 | - BUG #13: Some entries are not deflate correctly. 47 | ### v0.3.* 48 | - Upgrading dotnet version to 3.1 49 | 50 | 51 | ## Authors 52 | * [Leverson Carlos](https://github.com/LeversonCarlos) 53 | 54 | ## License 55 | MIT License - see the [LICENSE](LICENSE) file for details 56 | -------------------------------------------------------------------------------- /srcs/HttpZipDirectory.cs: -------------------------------------------------------------------------------- 1 | namespace System.IO.Compression 2 | { 3 | internal class HttpZipDirectory 4 | { 5 | public int Offset { get; set; } 6 | public int Size { get; set; } 7 | public short Entries { get; set; } 8 | } 9 | } -------------------------------------------------------------------------------- /srcs/HttpZipEntry.cs: -------------------------------------------------------------------------------- 1 | namespace System.IO.Compression 2 | { 3 | public class HttpZipEntry 4 | { 5 | 6 | internal HttpZipEntry(int index) 7 | { 8 | this.Index = index; 9 | } 10 | 11 | public int Index { get; } 12 | 13 | internal int Signature { get; set; } 14 | internal short VersionMadeBy { get; set; } 15 | internal short MinimumVersionNeededToExtract { get; set; } 16 | internal short GeneralPurposeBitFlag { get; set; } 17 | 18 | public short CompressionMethod { get; internal set; } 19 | public int FileLastModification { get; internal set; } 20 | public int CRC32 { get; internal set; } 21 | public int CompressedSize { get; internal set; } 22 | public int UncompressedSize { get; internal set; } 23 | 24 | internal short FileNameLength { get; set; } 25 | internal short ExtraFieldLength { get; set; } 26 | internal short FileCommentLength { get; set; } 27 | 28 | internal short DiskNumberWhereFileStarts { get; set; } 29 | internal short InternalFileAttributes { get; set; } 30 | internal int ExternalFileAttributes { get; set; } 31 | 32 | internal int FileOffset { get; set; } 33 | public string FileName { get; internal set; } 34 | public string ExtraField { get; internal set; } 35 | public string FileComment { get; internal set; } 36 | } 37 | } -------------------------------------------------------------------------------- /srcs/HttpZipStream.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Net.Http; 5 | using System.Net.Http.Headers; 6 | using System.Threading.Tasks; 7 | 8 | namespace System.IO.Compression 9 | { 10 | public class HttpZipStream : IDisposable 11 | { 12 | 13 | 14 | string httpUrl { get; set; } 15 | HttpClient httpClient { get; set; } 16 | bool LeaveHttpClientOpen { get; set; } 17 | public HttpZipStream(string httpUrl) : this(httpUrl, new HttpClient()) { this.LeaveHttpClientOpen = true; } 18 | public HttpZipStream(string httpUrl, HttpClient httpClient) 19 | { 20 | this.httpUrl = httpUrl; 21 | this.httpClient = httpClient; 22 | this.httpClient.DefaultRequestHeaders.Accept.Add(new MediaTypeWithQualityHeaderValue("application/octet-stream")); 23 | } 24 | 25 | 26 | public long ContentLength { get; private set; } = -1; 27 | 28 | /// 29 | /// Manually setting the content length is only recommended if you truly know what your doing. This may increase loading time but could also invalidate the requests. 30 | /// 31 | public void SetContentLength(long value) { this.ContentLength = value; } 32 | 33 | public async Task GetContentLengthAsync() 34 | { 35 | try 36 | { 37 | if (this.ContentLength != -1) { return this.ContentLength; } 38 | using (var httpMessage = await this.httpClient.GetAsync(this.httpUrl, HttpCompletionOption.ResponseHeadersRead)) 39 | { 40 | if (!httpMessage.IsSuccessStatusCode) { return -1; } 41 | this.ContentLength = httpMessage.Content.Headers 42 | .GetValues("Content-Length") 43 | .Select(x => long.Parse(x)) 44 | .FirstOrDefault(); 45 | return this.ContentLength; 46 | } 47 | } 48 | catch (Exception) { throw; } 49 | } 50 | 51 | 52 | HttpZipDirectory directoryData { get; set; } 53 | private async Task LocateDirectoryAsync() 54 | { 55 | try 56 | { 57 | 58 | // INITIALIZE 59 | this.directoryData = new HttpZipDirectory { Offset = -1 }; 60 | var secureMargin = 22; 61 | var chunkSize = 256; 62 | var rangeStart = this.ContentLength - secureMargin; 63 | var rangeFinish = this.ContentLength; 64 | 65 | // TRY TO FOUND THE CENTRAL DIRECTORY FOUR TIMES SLOWLY INCREASING THE CHUNK SIZE 66 | short tries = 1; 67 | while (this.directoryData.Offset == -1 && tries <= 4) 68 | { 69 | 70 | // MAKE A HTTP CALL USING THE RANGE HEADER 71 | rangeStart -= (chunkSize * tries); 72 | this.httpClient.DefaultRequestHeaders.Range = new RangeHeaderValue(rangeStart, rangeFinish); 73 | var byteArray = await httpClient.GetByteArrayAsync(this.httpUrl); 74 | 75 | // TRY TO LOCATE THE END OF CENTRAL DIRECTORY DEFINED BY 76 | // 50 4B 05 06 77 | // https://en.wikipedia.org/wiki/Zip_(file_format)#End_of_central_directory_record_(EOCD) 78 | int pos = (byteArray.Length - secureMargin); 79 | while (pos >= 0) 80 | { 81 | 82 | // FOUND CENTRAL DIRECTORY 83 | if (byteArray[pos + 0] == 0x50 && 84 | byteArray[pos + 1] == 0x4b && 85 | byteArray[pos + 2] == 0x05 && 86 | byteArray[pos + 3] == 0x06) 87 | { 88 | this.directoryData.Size = BitConverter.ToInt32(byteArray, pos + 12); 89 | this.directoryData.Offset = BitConverter.ToInt32(byteArray, pos + 16); 90 | this.directoryData.Entries = BitConverter.ToInt16(byteArray, pos + 10); 91 | return true; 92 | } 93 | else { pos--; } 94 | 95 | } 96 | 97 | tries++; 98 | } 99 | 100 | return false; 101 | } 102 | catch (Exception) { throw; } 103 | } 104 | 105 | 106 | public async Task> GetEntriesAsync() 107 | { 108 | try 109 | { 110 | // INITIALIZE 111 | var entryList = new List(); 112 | if (await this.GetContentLengthAsync() == -1) { return null; } 113 | if (await this.LocateDirectoryAsync() == false) { return null; } 114 | 115 | // MAKE A HTTP CALL USING THE RANGE HEADER 116 | var rangeStart = this.directoryData.Offset; 117 | var rangeFinish = this.directoryData.Offset + this.directoryData.Size; 118 | this.httpClient.DefaultRequestHeaders.Range = new RangeHeaderValue(rangeStart, rangeFinish); 119 | var byteArray = await httpClient.GetByteArrayAsync(this.httpUrl); 120 | 121 | // LOOP THROUGH ENTRIES 122 | var entriesOffset = 0; 123 | for (int entryIndex = 0; entryIndex < this.directoryData.Entries; entryIndex++) 124 | { 125 | var entry = new HttpZipEntry(entryIndex); 126 | // https://en.wikipedia.org/wiki/Zip_(file_format)#Local_file_header 127 | 128 | entry.Signature = BitConverter.ToInt32(byteArray, entriesOffset + 0); // 0x04034b50 129 | entry.VersionMadeBy = BitConverter.ToInt16(byteArray, entriesOffset + 4); 130 | entry.MinimumVersionNeededToExtract = BitConverter.ToInt16(byteArray, entriesOffset + 6); 131 | entry.GeneralPurposeBitFlag = BitConverter.ToInt16(byteArray, entriesOffset + 8); 132 | 133 | entry.CompressionMethod = BitConverter.ToInt16(byteArray, entriesOffset + 10); 134 | entry.FileLastModification = BitConverter.ToInt32(byteArray, entriesOffset + 12); 135 | entry.CRC32 = BitConverter.ToInt32(byteArray, entriesOffset + 16); 136 | entry.CompressedSize = BitConverter.ToInt32(byteArray, entriesOffset + 20); 137 | entry.UncompressedSize = BitConverter.ToInt32(byteArray, entriesOffset + 24); 138 | 139 | entry.FileNameLength = BitConverter.ToInt16(byteArray, entriesOffset + 28); // (n) 140 | entry.ExtraFieldLength = BitConverter.ToInt16(byteArray, entriesOffset + 30); // (m) 141 | entry.FileCommentLength = BitConverter.ToInt16(byteArray, entriesOffset + 32); // (k) 142 | 143 | entry.DiskNumberWhereFileStarts = BitConverter.ToInt16(byteArray, entriesOffset + 34); 144 | entry.InternalFileAttributes = BitConverter.ToInt16(byteArray, entriesOffset + 36); 145 | entry.ExternalFileAttributes = BitConverter.ToInt32(byteArray, entriesOffset + 38); 146 | entry.FileOffset = BitConverter.ToInt32(byteArray, entriesOffset + 42); 147 | 148 | var fileNameStart = entriesOffset + 46; 149 | var fileNameBuffer = new byte[entry.FileNameLength]; 150 | Array.Copy(byteArray, fileNameStart, fileNameBuffer, 0, entry.FileNameLength); 151 | entry.FileName = System.Text.Encoding.Default.GetString(fileNameBuffer); 152 | 153 | var extraFieldStart = fileNameStart + entry.FileNameLength; 154 | var extraFieldBuffer = new byte[entry.ExtraFieldLength]; 155 | Array.Copy(byteArray, extraFieldStart, extraFieldBuffer, 0, entry.ExtraFieldLength); 156 | entry.ExtraField = System.Text.Encoding.Default.GetString(extraFieldBuffer); 157 | 158 | var fileCommentStart = extraFieldStart + entry.ExtraFieldLength; 159 | var fileCommentBuffer = new byte[entry.FileCommentLength]; 160 | Array.Copy(byteArray, fileCommentStart, fileCommentBuffer, 0, entry.FileCommentLength); 161 | entry.FileComment = System.Text.Encoding.Default.GetString(fileCommentBuffer); 162 | 163 | entryList.Add(entry); 164 | entriesOffset = fileCommentStart + entry.FileCommentLength; 165 | } 166 | 167 | // RESULT 168 | return entryList; 169 | 170 | } 171 | catch (Exception) { throw; } 172 | } 173 | 174 | 175 | [Obsolete] 176 | public async Task ExtractAsync(List entryList, Action resultCallback) 177 | { 178 | try 179 | { 180 | foreach (var entry in entryList) 181 | { await this.ExtractAsync(entry, resultCallback); } 182 | } 183 | catch (Exception) { throw; } 184 | } 185 | 186 | public async Task ExtractAsync(HttpZipEntry entry, Action resultCallback) 187 | { 188 | try 189 | { 190 | var fileDataBuffer = await this.ExtractAsync(entry); 191 | var resultStream = new MemoryStream(fileDataBuffer); 192 | resultStream.Position = 0; 193 | resultCallback.Invoke(resultStream); 194 | return; 195 | } 196 | catch (Exception) { throw; } 197 | } 198 | 199 | public async Task ExtractAsync(HttpZipEntry entry) 200 | { 201 | try 202 | { 203 | 204 | // MAKE A HTTP CALL USING THE RANGE HEADER 205 | var fileHeaderLength = 30 + entry.FileNameLength + entry.ExtraFieldLength; 206 | var rangeStart = entry.FileOffset; 207 | var rangeFinish = entry.FileOffset + fileHeaderLength + entry.CompressedSize; 208 | this.httpClient.DefaultRequestHeaders.Range = new RangeHeaderValue(rangeStart, rangeFinish); 209 | var byteArray = await httpClient.GetByteArrayAsync(this.httpUrl); 210 | 211 | // LOCATE DATA BOUNDS 212 | // https://en.wikipedia.org/wiki/Zip_(file_format)#Local_file_header 213 | var fileSignature = BitConverter.ToInt32(byteArray, 0); 214 | var bitFlag = BitConverter.ToInt16(byteArray, 6); 215 | var compressionMethod = BitConverter.ToInt16(byteArray, 8); 216 | var crc = BitConverter.ToInt32(byteArray, 14); 217 | var compressedSize = BitConverter.ToInt32(byteArray, 18); 218 | var uncompressedSize = BitConverter.ToInt32(byteArray, 22); 219 | var fileNameLength = BitConverter.ToInt16(byteArray, 26); // (n) 220 | var extraFieldLength = BitConverter.ToInt16(byteArray, 28); // (m) 221 | var fileDataOffset = 30 + fileNameLength + extraFieldLength; 222 | var fileDataSize = entry.CompressedSize; 223 | 224 | // EXTRACT DATA BUFFER 225 | var fileDataBuffer = new byte[fileDataSize]; 226 | Array.Copy(byteArray, fileDataOffset, fileDataBuffer, 0, fileDataSize); 227 | Array.Clear(byteArray, 0, byteArray.Length); 228 | byteArray = null; 229 | 230 | /* STORED */ 231 | if (entry.CompressionMethod == 0) 232 | { return fileDataBuffer; } 233 | 234 | /* DEFLATED */ 235 | if (entry.CompressionMethod == 8) 236 | { 237 | var deflatedArray = new byte[entry.UncompressedSize]; 238 | using (var compressedStream = new MemoryStream(fileDataBuffer)) 239 | { 240 | 241 | using (var deflateStream = new System.IO.Compression.DeflateStream(compressedStream, CompressionMode.Decompress)) 242 | { 243 | await deflateStream.ReadAsync(deflatedArray, 0, deflatedArray.Length); 244 | } 245 | 246 | /* 247 | using (var deflatedStream = new MemoryStream()) 248 | { 249 | var deflater = new System.IO.Compression.DeflateStream(compressedStream, CompressionMode.Decompress, true); 250 | 251 | byte[] buffer = new byte[1024]; 252 | var bytesPending = entry.UncompressedSize; 253 | while (bytesPending > 0) 254 | { 255 | var bytesRead = deflater.Read(buffer, 0, (int)Math.Min(bytesPending, buffer.Length)); 256 | deflatedStream.Write(buffer, 0, bytesRead); 257 | bytesPending -= (uint)bytesRead; 258 | if (bytesRead == 0) { break; } 259 | } 260 | 261 | deflatedArray = deflatedStream.ToArray(); 262 | } 263 | */ 264 | 265 | } 266 | return deflatedArray; 267 | } 268 | 269 | // NOT SUPPORTED COMPRESSION METHOD 270 | throw new NotSupportedException($"The compression method [{entry.CompressionMethod}] is not supported"); 271 | } 272 | catch (Exception) { throw; } 273 | } 274 | 275 | 276 | public void Dispose() 277 | { 278 | if (!this.LeaveHttpClientOpen) { this.httpClient.Dispose(); this.httpClient = null; } 279 | this.directoryData = null; 280 | this.ContentLength = -1; 281 | } 282 | 283 | 284 | } 285 | } -------------------------------------------------------------------------------- /srcs/HttpZipStream.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | netstandard2.0 5 | Debug;Release 6 | 7 | 8 | 9 | HttpZipStream 10 | 0 11 | 0.4.$(BuildVersion) 12 | A simple library to extract specific entries from a remote http zip archive without the need to download the entire file 13 | Leverson Carlos;lcjohnny 14 | Leverson Carlos 15 | https://github.com/LeversonCarlos/HttpZipStream 16 | en 17 | httpzip remotezip zipstream httpzipstream http zip 18 | MIT 19 | false 20 | snupkg 21 | 22 | 23 | 24 | full 25 | true 26 | 27 | 28 | 29 | pdbonly 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /test/HttpZipStream.Test.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | netcoreapp3.1 5 | 6 | false 7 | 8 | 9 | 10 | 11 | 12 | 13 | all 14 | runtime; build; native; contentfiles; analyzers; buildtransitive 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /test/HttpZipStreamTest.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Linq; 3 | using Xunit; 4 | 5 | namespace System.IO.Compression 6 | { 7 | public class HttpZipStreamTest 8 | { 9 | string httpUrl = "https://onedrive.live.com/download.aspx?cid=ADED24162E9A6538&authKey=%21ABqNji2NaV0MT58&resid=ADED24162E9A6538%21107&ithint=%2Ecbz"; 10 | 11 | 12 | [Fact] 13 | public async void ExampleStream_ContentLength_MustBe_9702kbytes() 14 | { 15 | using (var streamZip = new HttpZipStream(httpUrl)) 16 | { 17 | var contentLength = await streamZip.GetContentLengthAsync(); 18 | Assert.Equal(9935427, contentLength); 19 | } 20 | } 21 | 22 | 23 | [Fact] 24 | public async void ExampleStream_Entries_MustHave_36items() 25 | { 26 | using (var streamZip = new HttpZipStream(httpUrl)) 27 | { 28 | var contentLength = await streamZip.GetContentLengthAsync(); 29 | var entryList = await streamZip.GetEntriesAsync(); 30 | Assert.Equal(36, entryList.Count); 31 | } 32 | } 33 | 34 | 35 | [Fact] 36 | public async void ExampleStream_LargerEntry_MustBe_0001_With_347kbytes() 37 | { 38 | using (var streamZip = new HttpZipStream(httpUrl)) 39 | { 40 | var contentLength = await streamZip.GetContentLengthAsync(); 41 | var entryList = await streamZip.GetEntriesAsync(); 42 | var largerEntry = entryList 43 | .OrderByDescending(x => x.CompressedSize) 44 | .Take(1) 45 | .FirstOrDefault(); 46 | Assert.Equal("Blue Beetle [1967] #01 - 0001.jpg", largerEntry.FileName); 47 | Assert.Equal(355736, largerEntry.CompressedSize); 48 | } 49 | } 50 | 51 | 52 | [Fact] 53 | public async void ExampleStream_SmallerEntryExtraction_MustResult_MemoryStream_With_227kbytes() 54 | { 55 | using (var streamZip = new HttpZipStream(httpUrl)) 56 | { 57 | var contentLength = await streamZip.GetContentLengthAsync(); 58 | var entryList = await streamZip.GetEntriesAsync(); 59 | var smallerEntry = entryList 60 | .OrderBy(x => x.CompressedSize) 61 | .Take(1) 62 | .FirstOrDefault(); 63 | long memoryStreamLength = 0; 64 | await streamZip.ExtractAsync(smallerEntry, (MemoryStream memoryStream) => 65 | { 66 | memoryStreamLength = memoryStream.Length; 67 | }); 68 | Assert.Equal(232723, memoryStreamLength); 69 | } 70 | } 71 | 72 | 73 | } 74 | } --------------------------------------------------------------------------------