├── .github
└── workflows
│ ├── release.yml
│ └── tests.yml
├── .gitignore
├── LICENSE
├── README.md
├── srcs
├── HttpZipDirectory.cs
├── HttpZipEntry.cs
├── HttpZipStream.cs
└── HttpZipStream.csproj
└── test
├── HttpZipStream.Test.csproj
└── HttpZipStreamTest.cs
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
1 | name: Release
2 |
3 | on:
4 | push:
5 | branches: [ master ]
6 |
7 | jobs:
8 | build:
9 | name: Build Package
10 | runs-on: ubuntu-latest
11 |
12 | steps:
13 |
14 | - name: Checkout Source Repository
15 | uses: actions/checkout@v2
16 |
17 | - name: Setup .NET Core
18 | uses: actions/setup-dotnet@v1
19 | with:
20 | dotnet-version: 3.1.101
21 |
22 | - name: Dump GitHub context
23 | env:
24 | GITHUB_CONTEXT: ${{ toJson(github) }}
25 | run: echo "$GITHUB_CONTEXT"
26 |
27 | - name: Build and Pack Assemblies
28 | run: dotnet pack ./srcs --configuration Release /p:BuildVersion=${{ github.run_number }} --output ./srcs/bin/Publish
29 |
30 | - name: Store Artifacts
31 | uses: actions/upload-artifact@v1.0.0
32 | with:
33 | name: artifacts
34 | path: ./srcs/bin/Publish
35 |
36 | deploy:
37 | needs: [build]
38 | name: Publish Package
39 | runs-on: ubuntu-latest
40 |
41 | steps:
42 |
43 | - name: Setup .NET Core
44 | uses: actions/setup-dotnet@v1
45 | with:
46 | dotnet-version: 3.1.101
47 |
48 | - name: Retrieve Artifacts
49 | uses: actions/download-artifact@v1.0.0
50 | with:
51 | name: artifacts
52 |
53 | - name: Publish Artifact to NuGet
54 | run: dotnet nuget push artifacts/*.nupkg --api-key ${{ secrets.NUGET_TOKEN_FOR_GITHUB }} --source https://nuget.org --skip-duplicate --no-symbols true
55 |
--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
1 | name: Tests
2 |
3 | on:
4 | pull_request:
5 |
6 | jobs:
7 | test:
8 | name: Execute Tests
9 | runs-on: ubuntu-latest
10 |
11 | steps:
12 |
13 | - name: Checkout Source Repository
14 | uses: actions/checkout@v2
15 |
16 | - name: Setup .NET Core
17 | uses: actions/setup-dotnet@v1
18 | with:
19 | dotnet-version: 3.1.101
20 |
21 | - name: Execute Tests
22 | run: dotnet test ./test
23 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | ## Ignore Visual Studio temporary files, build results, and
2 | ## files generated by popular Visual Studio add-ons.
3 | ##
4 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore
5 |
6 | # User-specific files
7 | *.suo
8 | *.user
9 | *.userosscache
10 | *.sln.docstates
11 |
12 | # User-specific files (MonoDevelop/Xamarin Studio)
13 | *.userprefs
14 |
15 | # Build results
16 | [Dd]ebug/
17 | [Dd]ebugPublic/
18 | [Rr]elease/
19 | [Rr]eleases/
20 | x64/
21 | x86/
22 | bld/
23 | [Bb]in/
24 | [Oo]bj/
25 | [Ll]og/
26 |
27 | # Visual Studio 2015/2017 cache/options directory
28 | .vs/
29 | # Uncomment if you have tasks that create the project's static files in wwwroot
30 | #wwwroot/
31 |
32 | # Visual Studio 2017 auto generated files
33 | Generated\ Files/
34 |
35 | # MSTest test Results
36 | [Tt]est[Rr]esult*/
37 | [Bb]uild[Ll]og.*
38 |
39 | # NUNIT
40 | *.VisualState.xml
41 | TestResult.xml
42 |
43 | # Build Results of an ATL Project
44 | [Dd]ebugPS/
45 | [Rr]eleasePS/
46 | dlldata.c
47 |
48 | # Benchmark Results
49 | BenchmarkDotNet.Artifacts/
50 |
51 | # .NET Core
52 | project.lock.json
53 | project.fragment.lock.json
54 | artifacts/
55 | **/Properties/launchSettings.json
56 |
57 | # StyleCop
58 | StyleCopReport.xml
59 |
60 | # Files built by Visual Studio
61 | *_i.c
62 | *_p.c
63 | *_i.h
64 | *.ilk
65 | *.meta
66 | *.obj
67 | *.iobj
68 | *.pch
69 | *.pdb
70 | *.ipdb
71 | *.pgc
72 | *.pgd
73 | *.rsp
74 | *.sbr
75 | *.tlb
76 | *.tli
77 | *.tlh
78 | *.tmp
79 | *.tmp_proj
80 | *.log
81 | *.vspscc
82 | *.vssscc
83 | .builds
84 | *.pidb
85 | *.svclog
86 | *.scc
87 |
88 | # Chutzpah Test files
89 | _Chutzpah*
90 |
91 | # Visual C++ cache files
92 | ipch/
93 | *.aps
94 | *.ncb
95 | *.opendb
96 | *.opensdf
97 | *.sdf
98 | *.cachefile
99 | *.VC.db
100 | *.VC.VC.opendb
101 |
102 | # Visual Studio profiler
103 | *.psess
104 | *.vsp
105 | *.vspx
106 | *.sap
107 |
108 | # Visual Studio Trace Files
109 | *.e2e
110 |
111 | # TFS 2012 Local Workspace
112 | $tf/
113 |
114 | # Guidance Automation Toolkit
115 | *.gpState
116 |
117 | # ReSharper is a .NET coding add-in
118 | _ReSharper*/
119 | *.[Rr]e[Ss]harper
120 | *.DotSettings.user
121 |
122 | # JustCode is a .NET coding add-in
123 | .JustCode
124 |
125 | # TeamCity is a build add-in
126 | _TeamCity*
127 |
128 | # DotCover is a Code Coverage Tool
129 | *.dotCover
130 |
131 | # AxoCover is a Code Coverage Tool
132 | .axoCover/*
133 | !.axoCover/settings.json
134 |
135 | # Visual Studio code coverage results
136 | *.coverage
137 | *.coveragexml
138 |
139 | # NCrunch
140 | _NCrunch_*
141 | .*crunch*.local.xml
142 | nCrunchTemp_*
143 |
144 | # MightyMoose
145 | *.mm.*
146 | AutoTest.Net/
147 |
148 | # Web workbench (sass)
149 | .sass-cache/
150 |
151 | # Installshield output folder
152 | [Ee]xpress/
153 |
154 | # DocProject is a documentation generator add-in
155 | DocProject/buildhelp/
156 | DocProject/Help/*.HxT
157 | DocProject/Help/*.HxC
158 | DocProject/Help/*.hhc
159 | DocProject/Help/*.hhk
160 | DocProject/Help/*.hhp
161 | DocProject/Help/Html2
162 | DocProject/Help/html
163 |
164 | # Click-Once directory
165 | publish/
166 |
167 | # Publish Web Output
168 | *.[Pp]ublish.xml
169 | *.azurePubxml
170 | # Note: Comment the next line if you want to checkin your web deploy settings,
171 | # but database connection strings (with potential passwords) will be unencrypted
172 | *.pubxml
173 | *.publishproj
174 |
175 | # Microsoft Azure Web App publish settings. Comment the next line if you want to
176 | # checkin your Azure Web App publish settings, but sensitive information contained
177 | # in these scripts will be unencrypted
178 | PublishScripts/
179 |
180 | # NuGet Packages
181 | *.nupkg
182 | # The packages folder can be ignored because of Package Restore
183 | **/[Pp]ackages/*
184 | # except build/, which is used as an MSBuild target.
185 | !**/[Pp]ackages/build/
186 | # Uncomment if necessary however generally it will be regenerated when needed
187 | #!**/[Pp]ackages/repositories.config
188 | # NuGet v3's project.json files produces more ignorable files
189 | *.nuget.props
190 | *.nuget.targets
191 |
192 | # Microsoft Azure Build Output
193 | csx/
194 | *.build.csdef
195 |
196 | # Microsoft Azure Emulator
197 | ecf/
198 | rcf/
199 |
200 | # Windows Store app package directories and files
201 | AppPackages/
202 | BundleArtifacts/
203 | Package.StoreAssociation.xml
204 | _pkginfo.txt
205 | *.appx
206 |
207 | # Visual Studio cache files
208 | # files ending in .cache can be ignored
209 | *.[Cc]ache
210 | # but keep track of directories ending in .cache
211 | !*.[Cc]ache/
212 |
213 | # Others
214 | ClientBin/
215 | ~$*
216 | *~
217 | *.dbmdl
218 | *.dbproj.schemaview
219 | *.jfm
220 | *.pfx
221 | *.publishsettings
222 | orleans.codegen.cs
223 |
224 | # Including strong name files can present a security risk
225 | # (https://github.com/github/gitignore/pull/2483#issue-259490424)
226 | #*.snk
227 |
228 | # Since there are multiple workflows, uncomment next line to ignore bower_components
229 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
230 | #bower_components/
231 |
232 | # RIA/Silverlight projects
233 | Generated_Code/
234 |
235 | # Backup & report files from converting an old project file
236 | # to a newer Visual Studio version. Backup files are not needed,
237 | # because we have git ;-)
238 | _UpgradeReport_Files/
239 | Backup*/
240 | UpgradeLog*.XML
241 | UpgradeLog*.htm
242 | ServiceFabricBackup/
243 | *.rptproj.bak
244 |
245 | # SQL Server files
246 | *.mdf
247 | *.ldf
248 | *.ndf
249 |
250 | # Business Intelligence projects
251 | *.rdl.data
252 | *.bim.layout
253 | *.bim_*.settings
254 | *.rptproj.rsuser
255 |
256 | # Microsoft Fakes
257 | FakesAssemblies/
258 |
259 | # GhostDoc plugin setting file
260 | *.GhostDoc.xml
261 |
262 | # Node.js Tools for Visual Studio
263 | .ntvs_analysis.dat
264 | node_modules/
265 |
266 | # Visual Studio 6 build log
267 | *.plg
268 |
269 | # Visual Studio 6 workspace options file
270 | *.opt
271 |
272 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
273 | *.vbw
274 |
275 | # Visual Studio LightSwitch build output
276 | **/*.HTMLClient/GeneratedArtifacts
277 | **/*.DesktopClient/GeneratedArtifacts
278 | **/*.DesktopClient/ModelManifest.xml
279 | **/*.Server/GeneratedArtifacts
280 | **/*.Server/ModelManifest.xml
281 | _Pvt_Extensions
282 |
283 | # Paket dependency manager
284 | .paket/paket.exe
285 | paket-files/
286 |
287 | # FAKE - F# Make
288 | .fake/
289 |
290 | # JetBrains Rider
291 | .idea/
292 | *.sln.iml
293 |
294 | # CodeRush
295 | .cr/
296 |
297 | # Python Tools for Visual Studio (PTVS)
298 | __pycache__/
299 | *.pyc
300 |
301 | # Cake - Uncomment if you are using it
302 | # tools/**
303 | # !tools/packages.config
304 |
305 | # Tabs Studio
306 | *.tss
307 |
308 | # Telerik's JustMock configuration file
309 | *.jmconfig
310 |
311 | # BizTalk build output
312 | *.btp.cs
313 | *.btm.cs
314 | *.odx.cs
315 | *.xsd.cs
316 |
317 | # OpenCover UI analysis results
318 | OpenCover/
319 |
320 | # Azure Stream Analytics local run output
321 | ASALocalRun/
322 |
323 | # MSBuild Binary and Structured Log
324 | *.binlog
325 |
326 | # NVidia Nsight GPU debugger configuration file
327 | *.nvuser
328 |
329 | # MFractors (Xamarin productivity tool) working folder
330 | .mfractor/
331 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2018 Leverson Carlos
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # HttpZipStream
2 | A simple library to extract specific entries from a remote http zip archive without the need to download the entire file.
3 | 
4 |
5 | ## Understanding the magic
6 | When opening a zip archive using a remote url, the zip library will need to download the entire file to be able to read its contents. So if you had a 90 mega zipfile and wanted only a 100 kbyte file from within it, you will end doing the entire 90 mega download anyway.
7 | The [zip format](https://en.wikipedia.org/wiki/Zip_(file_format)) defines a directory pointing to all it's inner entries. Containing properties like names, starting offset, size, and other stuff. And this directory is pretty small, just a few bytes placed on the very end of the archive. So, if we could just read this directory, we could know where, on the entire zip archive, is stored the file we want.
8 | And if we could just request from the remote url, just that part of the content, we could get a smaller download, with just what we want and need.
9 | Turns out that the http protocol supports a technique called [byte serving](https://en.wikipedia.org/wiki/Byte_serving). That states that we could define some header parameters on the http request specifying the byte ranges we want for that request.
10 | With that in mind, what we do it's pretty simple. We make a first http request asking just for the http headers (not its content) and from that we know the content size. Then we make a small range requests at the end of the file, extracting all the directory info. Then, for the entries we want, we make requests for just that ranges. Apply the deflate algoritm and it's done.
11 | With this approach, we end doing more http requests, so its only good to use if the desired content represents a small part of the entire zip archive.
12 | More on this, can be found on my [medium](https://medium.com/@lcjohnny/httpzipstream-extracting-single-entry-from-remote-zip-without-downloading-the-entire-file-7a0f3d24a6fc) article.
13 |
14 | ## Install instructions
15 | You can add the library to your project using the [nuget](https://www.nuget.org/packages/HttpZipStream) package:
16 | ```
17 | dotnet add package HttpZipStream
18 | ```
19 |
20 | ## Sample of how to use the library
21 | Extracting just the first entry from a remote zip archive:
22 | ```csharp
23 | var httpUrl = "http://MyRemoteFile.zip";
24 | using (var zipStream = new System.IO.Compression.HttpZipStream(httpUrl))
25 | {
26 | var entryList = await zipStream.GetEntriesAsync();
27 | var entry = entryList.FirstOrDefault();
28 | byte[] entryContent = await zipStream.ExtractAsync(entry);
29 | /* do what you want with the entry content */
30 | }
31 | ```
32 |
33 | ## Build using
34 | * [DotNET Core](https://dotnet.github.io)
35 | * [xUnit](https://xunit.github.io)
36 | * [vsCode](https://github.com/Microsoft/vscode)
37 | * [ZipFormat](https://en.wikipedia.org/wiki/Zip_(file_format))
38 |
39 | ## Changelog
40 | ### v0.1.*
41 | - Some minor documentation adjust.
42 | - Proper name convention for async methods.
43 | - Preparing projects to be build, packed and deploy by the server.
44 | ### v0.2.*
45 | - Implementing a ExtractAsync overload that results just the entry content byte array.
46 | - BUG #13: Some entries are not deflate correctly.
47 | ### v0.3.*
48 | - Upgrading dotnet version to 3.1
49 |
50 |
51 | ## Authors
52 | * [Leverson Carlos](https://github.com/LeversonCarlos)
53 |
54 | ## License
55 | MIT License - see the [LICENSE](LICENSE) file for details
56 |
--------------------------------------------------------------------------------
/srcs/HttpZipDirectory.cs:
--------------------------------------------------------------------------------
1 | namespace System.IO.Compression
2 | {
3 | internal class HttpZipDirectory
4 | {
5 | public int Offset { get; set; }
6 | public int Size { get; set; }
7 | public short Entries { get; set; }
8 | }
9 | }
--------------------------------------------------------------------------------
/srcs/HttpZipEntry.cs:
--------------------------------------------------------------------------------
1 | namespace System.IO.Compression
2 | {
3 | public class HttpZipEntry
4 | {
5 |
6 | internal HttpZipEntry(int index)
7 | {
8 | this.Index = index;
9 | }
10 |
11 | public int Index { get; }
12 |
13 | internal int Signature { get; set; }
14 | internal short VersionMadeBy { get; set; }
15 | internal short MinimumVersionNeededToExtract { get; set; }
16 | internal short GeneralPurposeBitFlag { get; set; }
17 |
18 | public short CompressionMethod { get; internal set; }
19 | public int FileLastModification { get; internal set; }
20 | public int CRC32 { get; internal set; }
21 | public int CompressedSize { get; internal set; }
22 | public int UncompressedSize { get; internal set; }
23 |
24 | internal short FileNameLength { get; set; }
25 | internal short ExtraFieldLength { get; set; }
26 | internal short FileCommentLength { get; set; }
27 |
28 | internal short DiskNumberWhereFileStarts { get; set; }
29 | internal short InternalFileAttributes { get; set; }
30 | internal int ExternalFileAttributes { get; set; }
31 |
32 | internal int FileOffset { get; set; }
33 | public string FileName { get; internal set; }
34 | public string ExtraField { get; internal set; }
35 | public string FileComment { get; internal set; }
36 | }
37 | }
--------------------------------------------------------------------------------
/srcs/HttpZipStream.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Linq;
4 | using System.Net.Http;
5 | using System.Net.Http.Headers;
6 | using System.Threading.Tasks;
7 |
8 | namespace System.IO.Compression
9 | {
10 | public class HttpZipStream : IDisposable
11 | {
12 |
13 |
14 | string httpUrl { get; set; }
15 | HttpClient httpClient { get; set; }
16 | bool LeaveHttpClientOpen { get; set; }
17 | public HttpZipStream(string httpUrl) : this(httpUrl, new HttpClient()) { this.LeaveHttpClientOpen = true; }
18 | public HttpZipStream(string httpUrl, HttpClient httpClient)
19 | {
20 | this.httpUrl = httpUrl;
21 | this.httpClient = httpClient;
22 | this.httpClient.DefaultRequestHeaders.Accept.Add(new MediaTypeWithQualityHeaderValue("application/octet-stream"));
23 | }
24 |
25 |
26 | public long ContentLength { get; private set; } = -1;
27 |
28 | ///
29 | /// Manually setting the content length is only recommended if you truly know what your doing. This may increase loading time but could also invalidate the requests.
30 | ///
31 | public void SetContentLength(long value) { this.ContentLength = value; }
32 |
33 | public async Task GetContentLengthAsync()
34 | {
35 | try
36 | {
37 | if (this.ContentLength != -1) { return this.ContentLength; }
38 | using (var httpMessage = await this.httpClient.GetAsync(this.httpUrl, HttpCompletionOption.ResponseHeadersRead))
39 | {
40 | if (!httpMessage.IsSuccessStatusCode) { return -1; }
41 | this.ContentLength = httpMessage.Content.Headers
42 | .GetValues("Content-Length")
43 | .Select(x => long.Parse(x))
44 | .FirstOrDefault();
45 | return this.ContentLength;
46 | }
47 | }
48 | catch (Exception) { throw; }
49 | }
50 |
51 |
52 | HttpZipDirectory directoryData { get; set; }
53 | private async Task LocateDirectoryAsync()
54 | {
55 | try
56 | {
57 |
58 | // INITIALIZE
59 | this.directoryData = new HttpZipDirectory { Offset = -1 };
60 | var secureMargin = 22;
61 | var chunkSize = 256;
62 | var rangeStart = this.ContentLength - secureMargin;
63 | var rangeFinish = this.ContentLength;
64 |
65 | // TRY TO FOUND THE CENTRAL DIRECTORY FOUR TIMES SLOWLY INCREASING THE CHUNK SIZE
66 | short tries = 1;
67 | while (this.directoryData.Offset == -1 && tries <= 4)
68 | {
69 |
70 | // MAKE A HTTP CALL USING THE RANGE HEADER
71 | rangeStart -= (chunkSize * tries);
72 | this.httpClient.DefaultRequestHeaders.Range = new RangeHeaderValue(rangeStart, rangeFinish);
73 | var byteArray = await httpClient.GetByteArrayAsync(this.httpUrl);
74 |
75 | // TRY TO LOCATE THE END OF CENTRAL DIRECTORY DEFINED BY
76 | // 50 4B 05 06
77 | // https://en.wikipedia.org/wiki/Zip_(file_format)#End_of_central_directory_record_(EOCD)
78 | int pos = (byteArray.Length - secureMargin);
79 | while (pos >= 0)
80 | {
81 |
82 | // FOUND CENTRAL DIRECTORY
83 | if (byteArray[pos + 0] == 0x50 &&
84 | byteArray[pos + 1] == 0x4b &&
85 | byteArray[pos + 2] == 0x05 &&
86 | byteArray[pos + 3] == 0x06)
87 | {
88 | this.directoryData.Size = BitConverter.ToInt32(byteArray, pos + 12);
89 | this.directoryData.Offset = BitConverter.ToInt32(byteArray, pos + 16);
90 | this.directoryData.Entries = BitConverter.ToInt16(byteArray, pos + 10);
91 | return true;
92 | }
93 | else { pos--; }
94 |
95 | }
96 |
97 | tries++;
98 | }
99 |
100 | return false;
101 | }
102 | catch (Exception) { throw; }
103 | }
104 |
105 |
106 | public async Task> GetEntriesAsync()
107 | {
108 | try
109 | {
110 | // INITIALIZE
111 | var entryList = new List();
112 | if (await this.GetContentLengthAsync() == -1) { return null; }
113 | if (await this.LocateDirectoryAsync() == false) { return null; }
114 |
115 | // MAKE A HTTP CALL USING THE RANGE HEADER
116 | var rangeStart = this.directoryData.Offset;
117 | var rangeFinish = this.directoryData.Offset + this.directoryData.Size;
118 | this.httpClient.DefaultRequestHeaders.Range = new RangeHeaderValue(rangeStart, rangeFinish);
119 | var byteArray = await httpClient.GetByteArrayAsync(this.httpUrl);
120 |
121 | // LOOP THROUGH ENTRIES
122 | var entriesOffset = 0;
123 | for (int entryIndex = 0; entryIndex < this.directoryData.Entries; entryIndex++)
124 | {
125 | var entry = new HttpZipEntry(entryIndex);
126 | // https://en.wikipedia.org/wiki/Zip_(file_format)#Local_file_header
127 |
128 | entry.Signature = BitConverter.ToInt32(byteArray, entriesOffset + 0); // 0x04034b50
129 | entry.VersionMadeBy = BitConverter.ToInt16(byteArray, entriesOffset + 4);
130 | entry.MinimumVersionNeededToExtract = BitConverter.ToInt16(byteArray, entriesOffset + 6);
131 | entry.GeneralPurposeBitFlag = BitConverter.ToInt16(byteArray, entriesOffset + 8);
132 |
133 | entry.CompressionMethod = BitConverter.ToInt16(byteArray, entriesOffset + 10);
134 | entry.FileLastModification = BitConverter.ToInt32(byteArray, entriesOffset + 12);
135 | entry.CRC32 = BitConverter.ToInt32(byteArray, entriesOffset + 16);
136 | entry.CompressedSize = BitConverter.ToInt32(byteArray, entriesOffset + 20);
137 | entry.UncompressedSize = BitConverter.ToInt32(byteArray, entriesOffset + 24);
138 |
139 | entry.FileNameLength = BitConverter.ToInt16(byteArray, entriesOffset + 28); // (n)
140 | entry.ExtraFieldLength = BitConverter.ToInt16(byteArray, entriesOffset + 30); // (m)
141 | entry.FileCommentLength = BitConverter.ToInt16(byteArray, entriesOffset + 32); // (k)
142 |
143 | entry.DiskNumberWhereFileStarts = BitConverter.ToInt16(byteArray, entriesOffset + 34);
144 | entry.InternalFileAttributes = BitConverter.ToInt16(byteArray, entriesOffset + 36);
145 | entry.ExternalFileAttributes = BitConverter.ToInt32(byteArray, entriesOffset + 38);
146 | entry.FileOffset = BitConverter.ToInt32(byteArray, entriesOffset + 42);
147 |
148 | var fileNameStart = entriesOffset + 46;
149 | var fileNameBuffer = new byte[entry.FileNameLength];
150 | Array.Copy(byteArray, fileNameStart, fileNameBuffer, 0, entry.FileNameLength);
151 | entry.FileName = System.Text.Encoding.Default.GetString(fileNameBuffer);
152 |
153 | var extraFieldStart = fileNameStart + entry.FileNameLength;
154 | var extraFieldBuffer = new byte[entry.ExtraFieldLength];
155 | Array.Copy(byteArray, extraFieldStart, extraFieldBuffer, 0, entry.ExtraFieldLength);
156 | entry.ExtraField = System.Text.Encoding.Default.GetString(extraFieldBuffer);
157 |
158 | var fileCommentStart = extraFieldStart + entry.ExtraFieldLength;
159 | var fileCommentBuffer = new byte[entry.FileCommentLength];
160 | Array.Copy(byteArray, fileCommentStart, fileCommentBuffer, 0, entry.FileCommentLength);
161 | entry.FileComment = System.Text.Encoding.Default.GetString(fileCommentBuffer);
162 |
163 | entryList.Add(entry);
164 | entriesOffset = fileCommentStart + entry.FileCommentLength;
165 | }
166 |
167 | // RESULT
168 | return entryList;
169 |
170 | }
171 | catch (Exception) { throw; }
172 | }
173 |
174 |
175 | [Obsolete]
176 | public async Task ExtractAsync(List entryList, Action resultCallback)
177 | {
178 | try
179 | {
180 | foreach (var entry in entryList)
181 | { await this.ExtractAsync(entry, resultCallback); }
182 | }
183 | catch (Exception) { throw; }
184 | }
185 |
186 | public async Task ExtractAsync(HttpZipEntry entry, Action resultCallback)
187 | {
188 | try
189 | {
190 | var fileDataBuffer = await this.ExtractAsync(entry);
191 | var resultStream = new MemoryStream(fileDataBuffer);
192 | resultStream.Position = 0;
193 | resultCallback.Invoke(resultStream);
194 | return;
195 | }
196 | catch (Exception) { throw; }
197 | }
198 |
199 | public async Task ExtractAsync(HttpZipEntry entry)
200 | {
201 | try
202 | {
203 |
204 | // MAKE A HTTP CALL USING THE RANGE HEADER
205 | var fileHeaderLength = 30 + entry.FileNameLength + entry.ExtraFieldLength;
206 | var rangeStart = entry.FileOffset;
207 | var rangeFinish = entry.FileOffset + fileHeaderLength + entry.CompressedSize;
208 | this.httpClient.DefaultRequestHeaders.Range = new RangeHeaderValue(rangeStart, rangeFinish);
209 | var byteArray = await httpClient.GetByteArrayAsync(this.httpUrl);
210 |
211 | // LOCATE DATA BOUNDS
212 | // https://en.wikipedia.org/wiki/Zip_(file_format)#Local_file_header
213 | var fileSignature = BitConverter.ToInt32(byteArray, 0);
214 | var bitFlag = BitConverter.ToInt16(byteArray, 6);
215 | var compressionMethod = BitConverter.ToInt16(byteArray, 8);
216 | var crc = BitConverter.ToInt32(byteArray, 14);
217 | var compressedSize = BitConverter.ToInt32(byteArray, 18);
218 | var uncompressedSize = BitConverter.ToInt32(byteArray, 22);
219 | var fileNameLength = BitConverter.ToInt16(byteArray, 26); // (n)
220 | var extraFieldLength = BitConverter.ToInt16(byteArray, 28); // (m)
221 | var fileDataOffset = 30 + fileNameLength + extraFieldLength;
222 | var fileDataSize = entry.CompressedSize;
223 |
224 | // EXTRACT DATA BUFFER
225 | var fileDataBuffer = new byte[fileDataSize];
226 | Array.Copy(byteArray, fileDataOffset, fileDataBuffer, 0, fileDataSize);
227 | Array.Clear(byteArray, 0, byteArray.Length);
228 | byteArray = null;
229 |
230 | /* STORED */
231 | if (entry.CompressionMethod == 0)
232 | { return fileDataBuffer; }
233 |
234 | /* DEFLATED */
235 | if (entry.CompressionMethod == 8)
236 | {
237 | var deflatedArray = new byte[entry.UncompressedSize];
238 | using (var compressedStream = new MemoryStream(fileDataBuffer))
239 | {
240 |
241 | using (var deflateStream = new System.IO.Compression.DeflateStream(compressedStream, CompressionMode.Decompress))
242 | {
243 | await deflateStream.ReadAsync(deflatedArray, 0, deflatedArray.Length);
244 | }
245 |
246 | /*
247 | using (var deflatedStream = new MemoryStream())
248 | {
249 | var deflater = new System.IO.Compression.DeflateStream(compressedStream, CompressionMode.Decompress, true);
250 |
251 | byte[] buffer = new byte[1024];
252 | var bytesPending = entry.UncompressedSize;
253 | while (bytesPending > 0)
254 | {
255 | var bytesRead = deflater.Read(buffer, 0, (int)Math.Min(bytesPending, buffer.Length));
256 | deflatedStream.Write(buffer, 0, bytesRead);
257 | bytesPending -= (uint)bytesRead;
258 | if (bytesRead == 0) { break; }
259 | }
260 |
261 | deflatedArray = deflatedStream.ToArray();
262 | }
263 | */
264 |
265 | }
266 | return deflatedArray;
267 | }
268 |
269 | // NOT SUPPORTED COMPRESSION METHOD
270 | throw new NotSupportedException($"The compression method [{entry.CompressionMethod}] is not supported");
271 | }
272 | catch (Exception) { throw; }
273 | }
274 |
275 |
276 | public void Dispose()
277 | {
278 | if (!this.LeaveHttpClientOpen) { this.httpClient.Dispose(); this.httpClient = null; }
279 | this.directoryData = null;
280 | this.ContentLength = -1;
281 | }
282 |
283 |
284 | }
285 | }
--------------------------------------------------------------------------------
/srcs/HttpZipStream.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | netstandard2.0
5 | Debug;Release
6 |
7 |
8 |
9 | HttpZipStream
10 | 0
11 | 0.4.$(BuildVersion)
12 | A simple library to extract specific entries from a remote http zip archive without the need to download the entire file
13 | Leverson Carlos;lcjohnny
14 | Leverson Carlos
15 | https://github.com/LeversonCarlos/HttpZipStream
16 | en
17 | httpzip remotezip zipstream httpzipstream http zip
18 | MIT
19 | false
20 | snupkg
21 |
22 |
23 |
24 | full
25 | true
26 |
27 |
28 |
29 | pdbonly
30 |
31 |
32 |
33 |
--------------------------------------------------------------------------------
/test/HttpZipStream.Test.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | netcoreapp3.1
5 |
6 | false
7 |
8 |
9 |
10 |
11 |
12 |
13 | all
14 | runtime; build; native; contentfiles; analyzers; buildtransitive
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
--------------------------------------------------------------------------------
/test/HttpZipStreamTest.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Linq;
3 | using Xunit;
4 |
5 | namespace System.IO.Compression
6 | {
7 | public class HttpZipStreamTest
8 | {
9 | string httpUrl = "https://onedrive.live.com/download.aspx?cid=ADED24162E9A6538&authKey=%21ABqNji2NaV0MT58&resid=ADED24162E9A6538%21107&ithint=%2Ecbz";
10 |
11 |
12 | [Fact]
13 | public async void ExampleStream_ContentLength_MustBe_9702kbytes()
14 | {
15 | using (var streamZip = new HttpZipStream(httpUrl))
16 | {
17 | var contentLength = await streamZip.GetContentLengthAsync();
18 | Assert.Equal(9935427, contentLength);
19 | }
20 | }
21 |
22 |
23 | [Fact]
24 | public async void ExampleStream_Entries_MustHave_36items()
25 | {
26 | using (var streamZip = new HttpZipStream(httpUrl))
27 | {
28 | var contentLength = await streamZip.GetContentLengthAsync();
29 | var entryList = await streamZip.GetEntriesAsync();
30 | Assert.Equal(36, entryList.Count);
31 | }
32 | }
33 |
34 |
35 | [Fact]
36 | public async void ExampleStream_LargerEntry_MustBe_0001_With_347kbytes()
37 | {
38 | using (var streamZip = new HttpZipStream(httpUrl))
39 | {
40 | var contentLength = await streamZip.GetContentLengthAsync();
41 | var entryList = await streamZip.GetEntriesAsync();
42 | var largerEntry = entryList
43 | .OrderByDescending(x => x.CompressedSize)
44 | .Take(1)
45 | .FirstOrDefault();
46 | Assert.Equal("Blue Beetle [1967] #01 - 0001.jpg", largerEntry.FileName);
47 | Assert.Equal(355736, largerEntry.CompressedSize);
48 | }
49 | }
50 |
51 |
52 | [Fact]
53 | public async void ExampleStream_SmallerEntryExtraction_MustResult_MemoryStream_With_227kbytes()
54 | {
55 | using (var streamZip = new HttpZipStream(httpUrl))
56 | {
57 | var contentLength = await streamZip.GetContentLengthAsync();
58 | var entryList = await streamZip.GetEntriesAsync();
59 | var smallerEntry = entryList
60 | .OrderBy(x => x.CompressedSize)
61 | .Take(1)
62 | .FirstOrDefault();
63 | long memoryStreamLength = 0;
64 | await streamZip.ExtractAsync(smallerEntry, (MemoryStream memoryStream) =>
65 | {
66 | memoryStreamLength = memoryStream.Length;
67 | });
68 | Assert.Equal(232723, memoryStreamLength);
69 | }
70 | }
71 |
72 |
73 | }
74 | }
--------------------------------------------------------------------------------