├── README.md ├── xpdf.pri ├── xpdf.cmake ├── LICENSE ├── xpdf_def.h ├── xpdf.h └── xpdf.cpp /README.md: -------------------------------------------------------------------------------- 1 | # XPDF 2 | -------------------------------------------------------------------------------- /xpdf.pri: -------------------------------------------------------------------------------- 1 | INCLUDEPATH += $$PWD 2 | DEPENDPATH += $$PWD 3 | 4 | HEADERS += \ 5 | $$PWD/xpdf.h \ 6 | $$PWD/xpdf_def.h 7 | 8 | SOURCES += \ 9 | $$PWD/xpdf.cpp 10 | 11 | !contains(XCONFIG, xbinary) { 12 | XCONFIG += xbinary 13 | include($$PWD/../Formats/xbinary.pri) 14 | } 15 | 16 | DISTFILES += \ 17 | $$PWD/LICENSE \ 18 | $$PWD/README.md \ 19 | $$PWD/xpdf.cmake 20 | -------------------------------------------------------------------------------- /xpdf.cmake: -------------------------------------------------------------------------------- 1 | include_directories(${CMAKE_CURRENT_LIST_DIR}) 2 | 3 | if (NOT DEFINED XBINARY_SOURCES) 4 | include(${CMAKE_CURRENT_LIST_DIR}/../Formats/xbinary.cmake) 5 | set(XPDF_SOURCES ${XPDF_SOURCES} ${XBINARY_SOURCES}) 6 | endif() 7 | 8 | set(XPDF_SOURCES 9 | ${XPDF_SOURCES} 10 | ${CMAKE_CURRENT_LIST_DIR}/xpdf.cpp 11 | ${CMAKE_CURRENT_LIST_DIR}/xpdf.h 12 | ${CMAKE_CURRENT_LIST_DIR}/xpdf_def.h 13 | ) 14 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022-2025 hors 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /xpdf_def.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2022-2025 hors 2 | * 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy 4 | * of this software and associated documentation files (the "Software"), to deal 5 | * in the Software without restriction, including without limitation the rights 6 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | * copies of the Software, and to permit persons to whom the Software is 8 | * furnished to do so, subject to the following conditions: 9 | * 10 | * The above copyright notice and this permission notice shall be included in 11 | * all copies or substantial portions of the Software. 12 | * 13 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | * SOFTWARE. 20 | */ 21 | #ifndef XPDF_DEF_H 22 | #define XPDF_DEF_H 23 | 24 | #include 25 | 26 | namespace XPDF_DEF { 27 | // Intentionally left empty: project-specific type aliases / defs may be added here. 28 | } // namespace XPDF_DEF 29 | #endif // XPDF_DEF_H 30 | -------------------------------------------------------------------------------- /xpdf.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2022-2025 hors 2 | * 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy 4 | * of this software and associated documentation files (the "Software"), to deal 5 | * in the Software without restriction, including without limitation the rights 6 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | * copies of the Software, and to permit persons to whom the Software is 8 | * furnished to do so, subject to the following conditions: 9 | * 10 | * The above copyright notice and this permission notice shall be included in 11 | * all copies or substantial portions of the Software. 12 | * 13 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | * SOFTWARE. 20 | */ 21 | #ifndef XPDF_H 22 | #define XPDF_H 23 | 24 | #include "xbinary.h" 25 | #include "xpdf_def.h" 26 | #include "xarchive.h" 27 | 28 | class XPDF : public XBinary { 29 | Q_OBJECT 30 | 31 | public: 32 | enum TYPE { 33 | TYPE_UNKNOWN = 0, 34 | TYPE_DOCUMENT 35 | }; 36 | 37 | struct STARTHREF { 38 | qint64 nXrefOffset; 39 | qint64 nFooterOffset; 40 | qint64 nFooterSize; 41 | bool bIsXref; 42 | bool bIsObject; 43 | }; 44 | 45 | struct OBJECT { 46 | quint64 nID; 47 | qint64 nOffset; 48 | qint64 nSize; 49 | }; 50 | 51 | struct STREAM { 52 | qint64 nOffset; 53 | qint64 nSize; 54 | }; 55 | 56 | struct XPART { 57 | quint64 nID; 58 | qint64 nOffset; 59 | qint64 nSize; 60 | QList listParts; 61 | QList listStreams; 62 | }; 63 | 64 | XPDF(QIODevice *pDevice); 65 | virtual ~XPDF(); 66 | 67 | virtual bool isValid(PDSTRUCT *pPdStruct = nullptr) override; 68 | virtual QString getVersion() override; 69 | virtual FT getFileType() override; 70 | virtual ENDIAN getEndian() override; 71 | virtual qint64 getFileFormatSize(PDSTRUCT *pPdStruct) override; 72 | virtual QString getFileFormatExt() override; 73 | virtual QString getFileFormatExtsString() override; 74 | virtual MODE getMode() override; 75 | virtual QString getMIMEString() override; 76 | 77 | virtual QList getMapModesList() override; 78 | virtual _MEMORY_MAP getMemoryMap(MAPMODE mapMode = MAPMODE_UNKNOWN, PDSTRUCT *pPdStruct = nullptr) override; 79 | 80 | QList findStartxrefs(qint64 nOffset, PDSTRUCT *pPdStruct); 81 | QList getObjectsFromStartxref(const STARTHREF *pStartxref, PDSTRUCT *pPdStruct); 82 | QList findObjects(qint64 nOffset, qint64 nSize, bool bDeepScan, PDSTRUCT *pPdStruct); 83 | OS_STRING _readPDFString(qint64 nOffset, qint64 nSize, PDSTRUCT *pPdStruct); 84 | OS_STRING _readPDFStringPart_title(qint64 nOffset, qint64 nSize, PDSTRUCT *pPdStruct); 85 | OS_STRING _readPDFStringPart(qint64 nOffset, PDSTRUCT *pPdStruct); 86 | OS_STRING _readPDFStringPart_const(qint64 nOffset, PDSTRUCT *pPdStruct); 87 | OS_STRING _readPDFStringPart_str(qint64 nOffset, PDSTRUCT *pPdStruct); 88 | OS_STRING _readPDFStringPart_val(qint64 nOffset, PDSTRUCT *pPdStruct); 89 | OS_STRING _readPDFStringPart_hex(qint64 nOffset, PDSTRUCT *pPdStruct); 90 | qint32 skipPDFEnding(qint64 *pnOffset, PDSTRUCT *pPdStruct); 91 | qint32 skipPDFSpace(qint64 *pnOffset, PDSTRUCT *pPdStruct); 92 | qint32 skipPDFString(qint64 *pnOffset, PDSTRUCT *pPdStruct); 93 | XPART handleXpart(qint64 nOffset, qint32 nID, qint32 nPartLimit, PDSTRUCT *pPdStruct); 94 | static bool _isObject(const QString &sString); 95 | static bool _isString(const QString &sString); 96 | static bool _isHex(const QString &sString); 97 | static bool _isDateTime(const QString &sString); 98 | static bool _isEndObject(const QString &sString); 99 | static bool _isComment(const QString &sString); 100 | static bool _isXref(const QString &sString); 101 | static QString _getCommentString(const QString &sString); 102 | static QString _getString(const QString &sString); 103 | static QString _getHex(const QString &sString); 104 | static QDateTime _getDateTime(const QString &sString); 105 | static qint32 getObjectID(const QString &sString); 106 | 107 | QList getParts(qint32 nPartLimit, PDSTRUCT *pPdStruct = nullptr); 108 | static QList getValuesByKey(QList *pListObjects, const QString &sKey, PDSTRUCT *pPdStruct = nullptr); 109 | static XVARIANT getFirstStringValueByKey(QList *pListStrings, const QString &sKey, PDSTRUCT *pPdStruct = nullptr); 110 | 111 | virtual qint32 getType() override; 112 | virtual QString typeIdToString(qint32 nType) override; 113 | 114 | QString getHeaderCommentAsHex(PDSTRUCT *pPdStruct); 115 | 116 | virtual QList getFileParts(quint32 nFileParts, qint32 nLimit = -1, PDSTRUCT *pPdStruct = nullptr) override; 117 | 118 | QString getFilters(PDSTRUCT *pPdStruct = nullptr); 119 | virtual QString getInfo(PDSTRUCT *pPdStruct = nullptr) override; 120 | 121 | // Streaming unpack API 122 | virtual bool initUnpack(UNPACK_STATE *pState, const QMap &mapProperties, PDSTRUCT *pPdStruct = nullptr) override; 123 | virtual ARCHIVERECORD infoCurrent(UNPACK_STATE *pState, PDSTRUCT *pPdStruct = nullptr) override; 124 | virtual bool unpackCurrent(UNPACK_STATE *pState, QIODevice *pDevice, PDSTRUCT *pPdStruct = nullptr) override; 125 | virtual bool moveToNext(UNPACK_STATE *pState, PDSTRUCT *pPdStruct = nullptr) override; 126 | virtual bool finishUnpack(UNPACK_STATE *pState, PDSTRUCT *pPdStruct = nullptr) override; 127 | 128 | private: 129 | struct UNPACK_CONTEXT { 130 | QList listStreams; 131 | qint32 nCurrentStreamIndex; 132 | }; 133 | }; 134 | 135 | #endif // XPDF_H 136 | -------------------------------------------------------------------------------- /xpdf.cpp: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2022-2025 hors 2 | * 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy 4 | * of this software and associated documentation files (the "Software"), to deal 5 | * in the Software without restriction, including without limitation the rights 6 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | * copies of the Software, and to permit persons to whom the Software is 8 | * furnished to do so, subject to the following conditions: 9 | * 10 | * The above copyright notice and this permission notice shall be included in 11 | * all copies or substantial portions of the Software. 12 | * 13 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | * SOFTWARE. 20 | */ 21 | #include "xpdf.h" 22 | 23 | XPDF::XPDF(QIODevice *pDevice) : XBinary(pDevice) 24 | { 25 | } 26 | 27 | XPDF::~XPDF() 28 | { 29 | } 30 | 31 | bool XPDF::isValid(PDSTRUCT *pPdStruct) 32 | { 33 | Q_UNUSED(pPdStruct) 34 | 35 | bool bResult = false; 36 | qint64 nFileSize = getSize(); 37 | // TODO more checks !!! 38 | // 1.0-2.0 39 | // %PDF- 40 | if (nFileSize > 4) { 41 | quint32 nHeader = read_uint32(0); 42 | if (nHeader == 0x46445025) // '%PDF' 43 | { 44 | bResult = true; 45 | } 46 | } 47 | 48 | return bResult; 49 | } 50 | 51 | QString XPDF::getVersion() 52 | { 53 | QString sResult; 54 | sResult = _readPDFString(5, 3, nullptr).sString; 55 | return sResult; 56 | } 57 | 58 | XBinary::FT XPDF::getFileType() 59 | { 60 | return FT_PDF; 61 | } 62 | 63 | XBinary::ENDIAN XPDF::getEndian() 64 | { 65 | return ENDIAN_UNKNOWN; 66 | } 67 | 68 | qint64 XPDF::getFileFormatSize(PDSTRUCT *pPdStruct) 69 | { 70 | return _calculateRawSize(pPdStruct); 71 | } 72 | 73 | QString XPDF::getFileFormatExt() 74 | { 75 | return QStringLiteral("pdf"); 76 | } 77 | 78 | QString XPDF::getFileFormatExtsString() 79 | { 80 | return QStringLiteral("PDF(pdf)"); 81 | } 82 | 83 | QString XPDF::getMIMEString() 84 | { 85 | return QStringLiteral("application/pdf"); 86 | } 87 | 88 | XBinary::MODE XPDF::getMode() 89 | { 90 | return MODE_UNKNOWN; // PDF does not have a specific mode like 16/32/64 91 | } 92 | 93 | QList XPDF::findObjects(qint64 nOffset, qint64 nSize, bool bDeepScan, PDSTRUCT *pPdStruct) 94 | { 95 | qint64 nFileSize = getSize(); 96 | if (nSize == -1) { 97 | nSize = nFileSize - nOffset; 98 | } 99 | 100 | QList listResult; 101 | 102 | qint64 nCurrentOffset = nOffset; 103 | qint64 nEndBound = nOffset + nSize; 104 | 105 | while (XBinary::isPdStructNotCanceled(pPdStruct) && (nCurrentOffset < nEndBound)) { 106 | // Read a small header token; 64 bytes is enough to capture " obj" and comments 107 | OS_STRING osString = _readPDFString(nCurrentOffset, 64, pPdStruct); 108 | 109 | if (_isObject(osString.sString)) { 110 | quint64 nID = getObjectID(osString.sString); 111 | // Bound the search for endobj to the current scan window 112 | qint64 nSearchStart = nCurrentOffset + osString.nSize; 113 | qint64 nSearchLen = qMax(0, nEndBound - nSearchStart); 114 | qint64 nEndObjOffset = (nSearchLen > 0) ? find_ansiString(nSearchStart, nSearchLen, "endobj", pPdStruct) : -1; 115 | 116 | if (nEndObjOffset != -1) { 117 | OS_STRING osEndObj = _readPDFString(nEndObjOffset, 32, pPdStruct); 118 | 119 | if (_isEndObject(osEndObj.sString)) { 120 | OBJECT objectRecord = {}; 121 | objectRecord.nOffset = nCurrentOffset; 122 | objectRecord.nID = nID; 123 | objectRecord.nSize = (nEndObjOffset + osEndObj.nSize) - nCurrentOffset; 124 | 125 | listResult.append(objectRecord); 126 | 127 | nCurrentOffset = nEndObjOffset + osEndObj.nSize; 128 | } else { 129 | break; 130 | } 131 | } else { 132 | break; 133 | } 134 | } else if (_isComment(osString.sString)) { 135 | // osString.nSize already includes trailing line ending; no extra skip needed 136 | nCurrentOffset += osString.nSize; 137 | } else { 138 | bool bContinue = false; 139 | if (bDeepScan) { 140 | qint64 nRemain = nEndBound - nCurrentOffset; 141 | nCurrentOffset = find_ansiString(nCurrentOffset, nRemain, " obj", pPdStruct); 142 | 143 | if (nCurrentOffset != -1) { 144 | while ((nCurrentOffset > 0) && XBinary::isPdStructNotCanceled(pPdStruct)) { 145 | quint8 nPrevChar = read_uint8(nCurrentOffset - 1); 146 | 147 | // If not number and not space 148 | if (!(((nPrevChar >= '0') && (nPrevChar <= '9')) || (nPrevChar == ' '))) { 149 | break; 150 | } 151 | 152 | --nCurrentOffset; 153 | } 154 | 155 | bContinue = true; 156 | } 157 | } 158 | 159 | if (!bContinue) { 160 | break; 161 | } 162 | } 163 | } 164 | 165 | return listResult; 166 | } 167 | 168 | qint32 XPDF::skipPDFString(qint64 *pnOffset, PDSTRUCT *pPdStruct) 169 | { 170 | qint32 nSize = _readPDFString(*pnOffset, 20, pPdStruct).nSize; 171 | *pnOffset += nSize; 172 | return nSize; 173 | } 174 | 175 | qint32 XPDF::skipPDFEnding(qint64 *pnOffset, PDSTRUCT *pPdStruct) 176 | { 177 | qint64 nStartOffset = *pnOffset; 178 | qint64 nFileSize = getSize(); 179 | qint64 nCurrentOffset = *pnOffset; 180 | 181 | while ((nCurrentOffset < nFileSize) && XBinary::isPdStructNotCanceled(pPdStruct)) { 182 | quint8 nChar = read_uint8(nCurrentOffset); 183 | if (nChar == 10) { 184 | ++nCurrentOffset; 185 | } else if (nChar == 13) { 186 | ++nCurrentOffset; 187 | if ((nCurrentOffset < nFileSize) && (read_uint8(nCurrentOffset) == 10)) { 188 | ++nCurrentOffset; 189 | } 190 | } else { 191 | break; 192 | } 193 | } 194 | 195 | *pnOffset = nCurrentOffset; 196 | return static_cast(nCurrentOffset - nStartOffset); 197 | } 198 | 199 | qint32 XPDF::skipPDFSpace(qint64 *pnOffset, PDSTRUCT *pPdStruct) 200 | { 201 | qint64 nStartOffset = *pnOffset; 202 | qint64 nFileSize = getSize(); 203 | qint64 nCurrentOffset = *pnOffset; 204 | 205 | while ((nCurrentOffset < nFileSize) && XBinary::isPdStructNotCanceled(pPdStruct)) { 206 | if (read_uint8(nCurrentOffset) == ' ') { 207 | ++nCurrentOffset; 208 | } else { 209 | break; 210 | } 211 | } 212 | 213 | *pnOffset = nCurrentOffset; 214 | return static_cast(nCurrentOffset - nStartOffset); 215 | } 216 | 217 | QList XPDF::getObjectsFromStartxref(const STARTHREF *pStartxref, PDSTRUCT *pPdStruct) 218 | { 219 | QList listResult; 220 | 221 | qint64 nTotalSize = getSize(); 222 | 223 | qint64 nCurrentOffset = pStartxref->nXrefOffset; 224 | 225 | OS_STRING osStringHref = _readPDFString(nCurrentOffset, 20, pPdStruct); 226 | 227 | if (_isXref(osStringHref.sString)) { 228 | nCurrentOffset += osStringHref.nSize; 229 | 230 | QMap mapObjects; 231 | 232 | while (XBinary::isPdStructNotCanceled(pPdStruct)) { 233 | OS_STRING osSection = _readPDFString(nCurrentOffset, 20, pPdStruct); 234 | 235 | if (!osSection.sString.isEmpty()) { 236 | quint64 nID = osSection.sString.section(" ", 0, 0).toULongLong(); 237 | quint64 nNumberOfObjects = osSection.sString.section(" ", 1, 1).toULongLong(); 238 | 239 | nCurrentOffset += osSection.nSize; 240 | 241 | if (nNumberOfObjects) { 242 | qint32 _nFreeIndex = XBinary::getFreeIndex(pPdStruct); 243 | XBinary::setPdStructInit(pPdStruct, _nFreeIndex, static_cast(nNumberOfObjects)); 244 | 245 | for (quint64 i = 0; (i < nNumberOfObjects) && XBinary::isPdStructNotCanceled(pPdStruct); ++i) { 246 | OS_STRING osObject = _readPDFString(nCurrentOffset, 20, pPdStruct); 247 | 248 | if (osObject.sString.section(" ", 2, 2) == "n") { 249 | qint64 nObjectOffset = osObject.sString.section(" ", 0, 0).toULongLong(); 250 | 251 | if ((nObjectOffset > 0) && (nObjectOffset < nTotalSize)) { 252 | mapObjects.insert(nObjectOffset, nID + i); 253 | } 254 | } 255 | 256 | nCurrentOffset += osObject.nSize; 257 | XBinary::setPdStructCurrentIncrement(pPdStruct, _nFreeIndex); 258 | } 259 | 260 | XBinary::setPdStructFinished(pPdStruct, _nFreeIndex); 261 | } else { 262 | break; 263 | } 264 | } else { 265 | break; 266 | } 267 | } 268 | 269 | QMapIterator iterator(mapObjects); 270 | while (iterator.hasNext() && XBinary::isPdStructNotCanceled(pPdStruct)) { 271 | iterator.next(); 272 | 273 | OBJECT object; 274 | object.nOffset = iterator.key(); 275 | object.nID = iterator.value(); 276 | object.nSize = 0; // Will be calculated later 277 | 278 | listResult.append(object); 279 | } 280 | 281 | qint32 nNumberOfObjects = listResult.count(); 282 | // Calculate sizes based on consecutive offsets 283 | for (qint32 i = 0; (i < nNumberOfObjects - 1) && XBinary::isPdStructNotCanceled(pPdStruct); ++i) { 284 | listResult[i].nSize = listResult[i + 1].nOffset - listResult[i].nOffset; 285 | } 286 | 287 | // Handle the last object's size using nXrefOffset 288 | if (!listResult.isEmpty()) { 289 | listResult.last().nSize = pStartxref->nXrefOffset - listResult.last().nOffset; 290 | } 291 | } 292 | 293 | return listResult; 294 | } 295 | 296 | XBinary::OS_STRING XPDF::_readPDFString(qint64 nOffset, qint64 nSize, PDSTRUCT *pPdStruct) 297 | { 298 | XBinary::OS_STRING result = {}; 299 | 300 | result.nOffset = nOffset; 301 | 302 | qint64 nFileSize = getSize(); 303 | if (nOffset < 0 || nOffset >= nFileSize) { 304 | return result; // Out of bounds 305 | } 306 | 307 | if (nSize == -1) { 308 | nSize = nFileSize - nOffset; 309 | } 310 | 311 | // Clamp read size to file end 312 | if (nOffset + nSize > nFileSize) { 313 | nSize = nFileSize - nOffset; 314 | } 315 | 316 | if (nSize <= 0) { 317 | return result; 318 | } 319 | 320 | qint64 nStartOffset = nOffset; 321 | qint64 nEndOffset = nOffset + nSize; 322 | for (; (nOffset < nEndOffset) && XBinary::isPdStructNotCanceled(pPdStruct); ++nOffset) { 323 | quint8 nChar = read_uint8(nOffset); 324 | // Stop on NUL, CR, or LF 325 | if ((nChar == 0) || (nChar == 13) || (nChar == 10)) { 326 | break; 327 | } 328 | result.sString.append(QLatin1Char(static_cast(nChar))); 329 | } 330 | 331 | // Bytes consumed for this string part 332 | result.nSize = nOffset - nStartOffset; 333 | 334 | // Include any trailing line ending in total size 335 | result.nSize += skipPDFEnding(&nOffset, pPdStruct); 336 | 337 | return result; 338 | } 339 | 340 | XBinary::OS_STRING XPDF::_readPDFStringPart_title(qint64 nOffset, qint64 nSize, PDSTRUCT *pPdStruct) 341 | { 342 | XBinary::OS_STRING result = {}; 343 | 344 | result.nOffset = nOffset; 345 | 346 | qint64 nFileSize = getSize(); 347 | if (nOffset < 0 || nOffset >= nFileSize) { 348 | return result; // Out of bounds 349 | } 350 | 351 | if (nSize == -1) { 352 | nSize = nFileSize - nOffset; 353 | } 354 | 355 | // Clamp to file size 356 | if (nOffset + nSize > nFileSize) { 357 | nSize = nFileSize - nOffset; 358 | } 359 | 360 | if (nSize <= 0) { 361 | return result; 362 | } 363 | 364 | qint64 nStartOffset = nOffset; 365 | qint64 nEndOffset = nOffset + nSize; 366 | for (; (nOffset < nEndOffset) && XBinary::isPdStructNotCanceled(pPdStruct); ++nOffset) { 367 | quint8 nChar = read_uint8(nOffset); 368 | // Stop on NUL, CR, LF, or '<' 369 | if ((nChar == 0) || (nChar == 13) || (nChar == 10) || (nChar == '<')) { 370 | break; 371 | } 372 | result.sString.append(QLatin1Char(static_cast(nChar))); 373 | } 374 | 375 | // bytes consumed for the title part 376 | result.nSize = nOffset - nStartOffset; 377 | 378 | // Include any trailing line ending in total size 379 | result.nSize += skipPDFEnding(&nOffset, pPdStruct); 380 | 381 | return result; 382 | } 383 | 384 | XBinary::OS_STRING XPDF::_readPDFStringPart(qint64 nOffset, PDSTRUCT *pPdStruct) 385 | { 386 | XBinary::OS_STRING result = {}; 387 | 388 | result.nOffset = nOffset; 389 | 390 | const qint64 nFileSize = getSize(); 391 | if (nOffset < 0 || nOffset >= nFileSize) { 392 | return result; // Out of bounds 393 | } 394 | 395 | qint64 nRemaining = nFileSize - nOffset; 396 | if (nRemaining <= 0) { 397 | return result; 398 | } 399 | 400 | const quint8 nChar = read_uint8(nOffset); 401 | 402 | if (nChar == '/') { 403 | result = _readPDFStringPart_const(nOffset, pPdStruct); 404 | } else if (nChar == '(') { 405 | result = _readPDFStringPart_str(nOffset, pPdStruct); 406 | } else if (nChar == '<') { 407 | if (nRemaining > 1) { 408 | if (read_uint8(nOffset + 1) == '<') { 409 | result.sString = "<<"; 410 | result.nSize = 2; 411 | } else { 412 | result = _readPDFStringPart_hex(nOffset, pPdStruct); 413 | } 414 | } 415 | } else if (nChar == '>') { 416 | if (nRemaining > 1) { 417 | if (read_uint8(nOffset + 1) == '>') { 418 | result.sString = ">>"; 419 | result.nSize = 2; 420 | } 421 | } 422 | } else if (nChar == '[') { 423 | result.sString = "["; 424 | result.nSize = 1; 425 | } else if (nChar == ']') { 426 | result.sString = "]"; 427 | result.nSize = 1; 428 | } else { 429 | result = _readPDFStringPart_val(nOffset, pPdStruct); 430 | } 431 | 432 | // Post-consume trailing spaces and line endings 433 | nOffset += result.nSize; 434 | result.nSize += skipPDFSpace(&nOffset, pPdStruct); 435 | result.nSize += skipPDFEnding(&nOffset, pPdStruct); 436 | 437 | return result; 438 | } 439 | 440 | XBinary::OS_STRING XPDF::_readPDFStringPart_const(qint64 nOffset, PDSTRUCT *pPdStruct) 441 | { 442 | XBinary::OS_STRING result = {}; 443 | 444 | result.nOffset = nOffset; 445 | 446 | const qint64 nFileSize = getSize(); 447 | if (nOffset < 0 || nOffset >= nFileSize) { 448 | return result; // Out of bounds 449 | } 450 | 451 | const qint64 nEndOffset = nFileSize; // read until file end or stop char 452 | bool bIsFirst = true; 453 | for (; (nOffset < nEndOffset) && XBinary::isPdStructNotCanceled(pPdStruct); ++nOffset) { 454 | const quint8 nChar = read_uint8(nOffset); 455 | 456 | // Stop on control/terminators or structural delimiters 457 | if ((nChar == 0) || (nChar == 10) || (nChar == 13) || (nChar == '[') || (nChar == ']') || (nChar == '<') || (nChar == '>') || (nChar == ' ') || (nChar == '(')) { 458 | break; 459 | } 460 | 461 | // Subsequent '/' starts a new token; include only the very first '/' 462 | if (!bIsFirst && (nChar == '/')) { 463 | break; 464 | } 465 | 466 | result.sString.append(QLatin1Char(static_cast(nChar))); 467 | result.nSize++; 468 | bIsFirst = false; 469 | } 470 | 471 | return result; 472 | } 473 | 474 | XBinary::OS_STRING XPDF::_readPDFStringPart_str(qint64 nOffset, PDSTRUCT *pPdStruct) 475 | { 476 | XBinary::OS_STRING result = {}; 477 | 478 | result.nOffset = nOffset; 479 | 480 | const qint64 nFileSize = getSize(); 481 | if (nOffset < 0 || nOffset >= nFileSize) { 482 | return result; // Out of bounds 483 | } 484 | 485 | qint64 nRemaining = nFileSize - nOffset; 486 | if (nRemaining <= 0) { 487 | return result; 488 | } 489 | 490 | bool bStart = false; 491 | bool bEnd = false; 492 | bool bUnicode = false; 493 | bool bBSlash = false; 494 | 495 | // Cursor-based loop 496 | for (; (nRemaining > 0) && XBinary::isPdStructNotCanceled(pPdStruct);) { 497 | if (!bUnicode) { 498 | const quint8 nChar = read_uint8(nOffset); 499 | 500 | // Stop on NUL, LF, CR 501 | if ((nChar == 0) || (nChar == 10) || (nChar == 13)) { 502 | break; 503 | } 504 | 505 | if (!bStart) { 506 | if (nChar == '(') { 507 | bStart = true; 508 | // Check UTF-16BE BOM after '(' 509 | if (nRemaining >= 3) { 510 | if ((read_uint8(nOffset + 1) == 0xFE) && (read_uint8(nOffset + 2) == 0xFF)) { 511 | bUnicode = true; 512 | result.nSize += 2; // count BOM bytes in size 513 | nOffset += 2; 514 | nRemaining -= 2; 515 | } 516 | } 517 | result.sString.append('('); 518 | } else { 519 | // If first char isn't '(', treat as plain char (legacy behavior) 520 | if (bBSlash) { 521 | bBSlash = false; 522 | } 523 | result.sString.append(QLatin1Char(static_cast(nChar))); 524 | } 525 | } else if ((nChar == ')') && (!bBSlash)) { 526 | result.sString.append(')'); 527 | bEnd = true; 528 | } else if (nChar == '\\') { 529 | bBSlash = true; 530 | } else { 531 | if (bBSlash) { 532 | bBSlash = false; 533 | } 534 | result.sString.append(QLatin1Char(static_cast(nChar))); 535 | } 536 | ++result.nSize; 537 | ++nOffset; 538 | --nRemaining; 539 | } else if (nRemaining >= 2) { 540 | const quint16 nWord = read_uint16(nOffset, true); 541 | 542 | if (((nWord >> 8) == ')') && (!bBSlash)) { 543 | result.sString.append(')'); 544 | ++result.nSize; // only one byte of ')' 545 | bEnd = true; 546 | } else if (nWord == '\\') { 547 | bBSlash = true; 548 | nOffset += 2; 549 | nRemaining -= 2; 550 | result.nSize += 2; 551 | continue; 552 | } else if (bBSlash && (nWord == 0x6e29)) { // 'n' ')' 553 | bBSlash = false; 554 | result.sString.append(')'); 555 | ++result.nSize; 556 | bEnd = true; 557 | } else { 558 | if (bBSlash) { 559 | bBSlash = false; 560 | } 561 | result.sString.append(QChar(nWord)); 562 | nOffset += 2; 563 | nRemaining -= 2; 564 | result.nSize += 2; 565 | continue; 566 | } 567 | 568 | // advance by one byte for the cases above where we consumed a single char 569 | ++nOffset; 570 | --nRemaining; 571 | } else { 572 | break; 573 | } 574 | 575 | if (bStart && bEnd) { 576 | break; 577 | } 578 | } 579 | 580 | return result; 581 | } 582 | 583 | XBinary::OS_STRING XPDF::_readPDFStringPart_val(qint64 nOffset, PDSTRUCT *pPdStruct) 584 | { 585 | XBinary::OS_STRING result = {}; 586 | 587 | result.nOffset = nOffset; 588 | 589 | const qint64 nFileSize = getSize(); 590 | if (nOffset < 0 || nOffset >= nFileSize) { 591 | return result; // Out of bounds 592 | } 593 | 594 | qint64 nRemaining = nFileSize - nOffset; 595 | if (nRemaining <= 0) { 596 | return result; 597 | } 598 | 599 | bool bSpace = false; 600 | 601 | for (; (nRemaining > 0) && XBinary::isPdStructNotCanceled(pPdStruct); ++nOffset, --nRemaining) { 602 | const quint8 nChar = read_uint8(nOffset); 603 | 604 | if ((nChar == 0) || (nChar == 10) || (nChar == 13) || (nChar == '[') || (nChar == ']') || (nChar == '<') || (nChar == '>') || (nChar == '/')) { 605 | break; 606 | } 607 | 608 | ++result.nSize; 609 | 610 | if (nChar == ' ') { 611 | bSpace = true; 612 | break; 613 | } 614 | 615 | result.sString.append(QLatin1Char(static_cast(nChar))); 616 | } 617 | 618 | if (bSpace) { 619 | if (nRemaining >= 3) { 620 | const QString sSuffix = read_ansiString(nOffset + result.nSize, 3); 621 | if (sSuffix == "0 R") { 622 | result.sString.append(" " + sSuffix); 623 | result.nSize += 3; 624 | } 625 | } 626 | } 627 | 628 | return result; 629 | } 630 | 631 | XBinary::OS_STRING XPDF::_readPDFStringPart_hex(qint64 nOffset, PDSTRUCT *pPdStruct) 632 | { 633 | XBinary::OS_STRING result = {}; 634 | 635 | result.nOffset = nOffset; 636 | 637 | const qint64 nFileSize = getSize(); 638 | if (nOffset < 0 || nOffset >= nFileSize) { 639 | return result; // Out of bounds 640 | } 641 | 642 | qint64 nRemaining = nFileSize - nOffset; 643 | if (nRemaining <= 0) { 644 | return result; 645 | } 646 | 647 | // First byte must be '<' 648 | const quint8 nFirst = read_uint8(nOffset); 649 | if (nFirst != '<') { 650 | return result; 651 | } 652 | 653 | // Cursor-based copy until '>' inclusive 654 | for (; (nRemaining > 0) && XBinary::isPdStructNotCanceled(pPdStruct); ++nOffset, --nRemaining) { 655 | const quint8 nChar = read_uint8(nOffset); 656 | result.sString.append(QLatin1Char(static_cast(nChar))); 657 | ++result.nSize; 658 | if (nChar == '>') { 659 | break; 660 | } 661 | } 662 | 663 | return result; 664 | } 665 | 666 | QList XPDF::getMapModesList() 667 | { 668 | QList listResult; 669 | 670 | listResult.append(MAPMODE_DATA); 671 | listResult.append(MAPMODE_OBJECTS); 672 | listResult.append(MAPMODE_STREAMS); 673 | 674 | return listResult; 675 | } 676 | 677 | XBinary::_MEMORY_MAP XPDF::getMemoryMap(MAPMODE mapMode, PDSTRUCT *pPdStruct) 678 | { 679 | XBinary::_MEMORY_MAP result = {}; 680 | 681 | if (mapMode == MAPMODE_UNKNOWN) { 682 | mapMode = MAPMODE_DATA; // Default mode 683 | } 684 | 685 | if (mapMode == MAPMODE_OBJECTS) { 686 | result = _getMemoryMap(FILEPART_SIGNATURE | FILEPART_OBJECT | FILEPART_FOOTER | FILEPART_TABLE | FILEPART_OVERLAY, pPdStruct); 687 | } else if (mapMode == MAPMODE_STREAMS) { 688 | result = _getMemoryMap(FILEPART_STREAM, pPdStruct); 689 | } else if (mapMode == MAPMODE_DATA) { 690 | result = _getMemoryMap(FILEPART_DATA | FILEPART_OVERLAY, pPdStruct); 691 | } 692 | 693 | return result; 694 | } 695 | 696 | QList XPDF::findStartxrefs(qint64 nOffset, PDSTRUCT *pPdStruct) 697 | { 698 | QList listResult; 699 | 700 | const qint64 nFileSize = getSize(); 701 | 702 | while (XBinary::isPdStructNotCanceled(pPdStruct)) { 703 | qint64 nStartXref = find_signature(nOffset, -1, "'startxref'", nullptr, pPdStruct); // \n \r 704 | if (nStartXref == -1) { 705 | break; 706 | } 707 | 708 | qint64 nCurrent = nStartXref; 709 | 710 | OS_STRING osStartXref = _readPDFString(nCurrent, 20, pPdStruct); 711 | nCurrent += osStartXref.nSize; 712 | 713 | OS_STRING osOffset = _readPDFString(nCurrent, 20, pPdStruct); 714 | qint64 nTargetOffset = osOffset.sString.toLongLong(); 715 | 716 | OS_STRING osHref = _readPDFString(nTargetOffset, 20, pPdStruct); 717 | bool bIsXref = _isXref(osHref.sString); 718 | bool bIsObject = _isObject(osHref.sString); 719 | 720 | if ((bIsXref || bIsObject) && (nTargetOffset < nCurrent)) { 721 | nCurrent += osOffset.nSize; 722 | 723 | OS_STRING osEnd = _readPDFString(nCurrent, 20, pPdStruct); 724 | QString sFooterHead = osEnd.sString; 725 | #if QT_VERSION >= QT_VERSION_CHECK(5, 7, 0) 726 | sFooterHead.resize(5, QChar(' ')); 727 | #else 728 | sFooterHead.resize(5); 729 | for (int i = osEnd.sString.length(); i < 5; i++) { 730 | sFooterHead[i] = QChar(' '); 731 | } 732 | #endif 733 | 734 | if (sFooterHead == QStringLiteral("%%EOF")) { 735 | nCurrent += 5; 736 | 737 | // Skip optional CR and LF, bounds-checked 738 | if ((nCurrent < nFileSize) && (read_uint8(nCurrent) == 13)) { 739 | ++nCurrent; 740 | } 741 | if ((nCurrent < nFileSize) && (read_uint8(nCurrent) == 10)) { 742 | ++nCurrent; 743 | } 744 | 745 | STARTHREF record = {}; 746 | record.nXrefOffset = nTargetOffset; 747 | record.nFooterOffset = nStartXref; 748 | record.nFooterSize = nCurrent - nStartXref; 749 | record.bIsObject = bIsObject; 750 | record.bIsXref = bIsXref; 751 | listResult.append(record); 752 | 753 | if (osEnd.sString.size() != 5) { 754 | break; 755 | } 756 | 757 | OS_STRING osAppend = _readPDFString(nCurrent, 20, pPdStruct); 758 | if ((!_isObject(osAppend.sString)) && (!_isComment(osAppend.sString)) && (!_isXref(osAppend.sString))) { 759 | break; // No append 760 | } 761 | } 762 | } 763 | 764 | nOffset = nStartXref + 10; // Get the last 765 | } 766 | 767 | return listResult; 768 | } 769 | 770 | XPDF::XPART XPDF::handleXpart(qint64 nOffset, qint32 nID, qint32 nPartLimit, PDSTRUCT *pPdStruct) 771 | { 772 | XPART result = {}; 773 | result.nOffset = nOffset; 774 | result.nID = nID; 775 | 776 | QString sLength; 777 | bool bLength = false; 778 | 779 | while (XBinary::isPdStructNotCanceled(pPdStruct)) { 780 | bool bStop = false; 781 | OS_STRING osString = _readPDFStringPart_title(nOffset, 20, pPdStruct); 782 | 783 | if (result.nID == 0) { 784 | result.nID = getObjectID(osString.sString); 785 | } 786 | 787 | nOffset += osString.nSize; 788 | 789 | if (_isObject(osString.sString)) { 790 | qint32 nObj = 0; 791 | qint32 nCol = 0; 792 | qint32 nPartCount = 0; 793 | while (XBinary::isPdStructNotCanceled(pPdStruct)) { 794 | OS_STRING osStringPart = _readPDFStringPart(nOffset, pPdStruct); 795 | 796 | if ((nPartCount < nPartLimit) || (nPartLimit == -1)) { 797 | result.listParts.append(osStringPart.sString); 798 | ++nPartCount; 799 | } else { 800 | bStop = true; 801 | break; 802 | } 803 | 804 | nOffset += osStringPart.nSize; 805 | 806 | if (osStringPart.sString.isEmpty()) { 807 | break; 808 | } 809 | 810 | if (osStringPart.sString == QLatin1String("<<")) { 811 | ++nObj; 812 | } else if (osStringPart.sString == QLatin1String(">>")) { 813 | --nObj; 814 | } else if (osStringPart.sString == QLatin1String("[")) { 815 | ++nCol; 816 | } else if (osStringPart.sString == QLatin1String("]")) { 817 | --nCol; 818 | } else if (osStringPart.sString == QLatin1String("/Length")) { 819 | bLength = true; 820 | } else if (bLength) { 821 | bLength = false; 822 | sLength = osStringPart.sString; 823 | } 824 | 825 | if ((nObj == 0) && (nCol == 0)) { 826 | break; 827 | } 828 | } 829 | 830 | if (bStop) { 831 | break; 832 | } 833 | } else if (osString.sString == QLatin1String("stream")) { 834 | STREAM stream = {}; 835 | stream.nOffset = nOffset; 836 | if (sLength.toInt()) { 837 | stream.nSize = sLength.toInt(); 838 | } else if (sLength.section(" ", 2, 2) == QLatin1String("R")) { 839 | QString sPattern = sLength; 840 | sPattern.replace("R", "obj"); 841 | qint64 nObjectOffset = find_ansiString(nOffset, -1, sPattern, pPdStruct); 842 | 843 | if (nObjectOffset != -1) { 844 | qint64 nTmp = nObjectOffset; 845 | skipPDFString(&nTmp, pPdStruct); 846 | OS_STRING osLen = _readPDFStringPart_val(nTmp, pPdStruct); 847 | 848 | if (osLen.sString.toInt()) { 849 | stream.nSize = osLen.sString.toInt(); 850 | } else { 851 | break; 852 | } 853 | } 854 | } else { 855 | break; 856 | } 857 | 858 | if (stream.nSize) { 859 | nOffset += stream.nSize; 860 | skipPDFEnding(&nOffset, pPdStruct); 861 | result.listStreams.append(stream); 862 | } 863 | } else if (osString.sString == QLatin1String("endstream")) { 864 | // TODO 865 | } else if (_isEndObject(osString.sString)) { 866 | break; 867 | } else if (osString.sString.isEmpty()) { 868 | break; 869 | } 870 | } 871 | 872 | result.nSize = nOffset - result.nOffset; 873 | 874 | return result; 875 | } 876 | 877 | bool XPDF::_isObject(const QString &sString) 878 | { 879 | // Fast check: last token equals "obj" 880 | qint32 nI = sString.size(); 881 | while (nI > 0 && sString.at(nI - 1) == QChar(' ')) --nI; // trim right spaces 882 | const qint32 nTokenLen = 3; // "obj" 883 | if (nI < nTokenLen) return false; 884 | // Ensure boundary before token is start or space 885 | const qint32 nStart = nI - nTokenLen; 886 | if (!((nStart == 0) || (sString.at(nStart - 1) == QChar(' ')))) return false; 887 | // Compare without allocating 888 | return (sString.at(nStart) == QChar('o') && sString.at(nStart + 1) == QChar('b') && sString.at(nStart + 2) == QChar('j')); 889 | } 890 | 891 | bool XPDF::_isString(const QString &sString) 892 | { 893 | bool bResult = false; 894 | 895 | qint32 nSize = sString.size(); 896 | if (nSize >= 2) { 897 | if ((sString.at(0) == QChar('(')) && (sString.at(nSize - 1) == QChar(')'))) { 898 | bResult = true; 899 | } 900 | } 901 | 902 | return bResult; 903 | } 904 | 905 | bool XPDF::_isHex(const QString &sString) 906 | { 907 | bool bResult = false; 908 | 909 | qint32 nSize = sString.size(); 910 | if (nSize >= 2) { 911 | if ((sString.at(0) == QChar('<')) && (sString.at(nSize - 1) == QChar('>'))) { 912 | if (sString.at(0) != sString.at(1)) { 913 | bResult = true; 914 | } 915 | } 916 | } 917 | 918 | return bResult; 919 | } 920 | 921 | bool XPDF::_isDateTime(const QString &sString) 922 | { 923 | bool bResult = false; 924 | 925 | qint32 nSize = sString.size(); 926 | if (nSize >= 18) { 927 | if (sString.startsWith("(D:") && (sString.at(nSize - 1) == QChar(')'))) { 928 | bResult = true; 929 | } 930 | } 931 | 932 | return bResult; 933 | } 934 | 935 | bool XPDF::_isEndObject(const QString &sString) 936 | { 937 | // Compare against "endobj" ignoring surrounding spaces without allocating 938 | qint32 nLeft = 0; 939 | qint32 nRight = sString.size(); 940 | while (nLeft < nRight && sString.at(nLeft) == QChar(' ')) ++nLeft; 941 | while (nRight > nLeft && sString.at(nRight - 1) == QChar(' ')) --nRight; 942 | if (nRight - nLeft != 6) return false; 943 | return (sString.at(nLeft) == QChar('e') && sString.at(nLeft + 1) == QChar('n') && sString.at(nLeft + 2) == QChar('d') && sString.at(nLeft + 3) == QChar('o') && 944 | sString.at(nLeft + 4) == QChar('b') && sString.at(nLeft + 5) == QChar('j')); 945 | } 946 | 947 | bool XPDF::_isComment(const QString &sString) 948 | { 949 | bool bResult = false; 950 | 951 | if (!sString.isEmpty()) { 952 | bResult = (sString.at(0) == QChar('%')); 953 | } 954 | 955 | return bResult; 956 | } 957 | 958 | bool XPDF::_isXref(const QString &sString) 959 | { 960 | bool bResult = false; 961 | 962 | if (!sString.isEmpty()) { 963 | // Fast path: check prefix "xref" and boundary 964 | const qint32 nTLen = 4; 965 | if (sString.size() >= nTLen && sString.at(0) == QChar('x') && sString.at(1) == QChar('r') && sString.at(2) == QChar('e') && sString.at(3) == QChar('f')) { 966 | bResult = (sString.size() == nTLen) || (sString.at(4) == QChar(' ')); 967 | } 968 | } 969 | 970 | return bResult; 971 | } 972 | 973 | QString XPDF::_getCommentString(const QString &sString) 974 | { 975 | QString sResult; 976 | 977 | if (!sString.isEmpty() && (sString.at(0) == QChar('%'))) { 978 | sResult = sString.mid(1); 979 | } 980 | 981 | return sResult; 982 | } 983 | 984 | QString XPDF::_getString(const QString &sString) 985 | { 986 | QString sResult; 987 | 988 | qint32 nSize = sString.size(); 989 | if (nSize >= 2) { 990 | sResult = sString.mid(1, nSize - 2); 991 | } 992 | 993 | return sResult; 994 | } 995 | 996 | QString XPDF::_getHex(const QString &sString) 997 | { 998 | QString sResult; 999 | 1000 | qint32 nSize = sString.size(); 1001 | if (nSize >= 2) { 1002 | sResult = sString.mid(1, nSize - 2); 1003 | } 1004 | 1005 | return sResult; 1006 | } 1007 | 1008 | QDateTime XPDF::_getDateTime(const QString &sString) 1009 | { 1010 | QDateTime result; 1011 | 1012 | QString sDate = sString.section(":", 1, -1); 1013 | sDate = sDate.section(")", 0, 0); 1014 | sDate.remove(QChar('\'')); 1015 | sDate.replace(QChar('Z'), QChar('+')); 1016 | // sDate.resize(14, QChar('0')); 1017 | 1018 | result = QDateTime::fromString(sDate, "yyyyMMddhhmmsstt"); 1019 | 1020 | return result; 1021 | } 1022 | 1023 | qint32 XPDF::getObjectID(const QString &sString) 1024 | { 1025 | // Parse leading integer (until first space) without allocating 1026 | qint64 n = 0; 1027 | bool bNeg = false; 1028 | qint32 nI = 0; 1029 | const qint32 nLen = sString.size(); 1030 | if (nI < nLen && sString.at(nI) == QChar('-')) { 1031 | bNeg = true; 1032 | ++nI; 1033 | } 1034 | for (; nI < nLen; ++nI) { 1035 | const QChar c = sString.at(nI); 1036 | if (c < QChar('0') || c > QChar('9')) break; 1037 | n = n * 10 + (c.unicode() - '0'); 1038 | } 1039 | if (bNeg) n = -n; 1040 | return static_cast(n); 1041 | } 1042 | 1043 | XBinary::XVARIANT XPDF::getFirstStringValueByKey(QList *pListStrings, const QString &sKey, PDSTRUCT *pPdStruct) 1044 | { 1045 | XBinary::XVARIANT varResult; 1046 | 1047 | const qint32 nNumberOfParts = pListStrings->count(); 1048 | 1049 | for (qint32 j = 0; (j + 1 < nNumberOfParts) && XBinary::isPdStructNotCanceled(pPdStruct); ++j) { 1050 | const QString &sCurrentKey = pListStrings->at(j); 1051 | 1052 | if (sCurrentKey == sKey) { 1053 | const QString &sValue = pListStrings->at(j + 1); 1054 | XVARIANT varValue; 1055 | 1056 | // Preserve original "0" behavior (not treated as number) 1057 | qlonglong ll = sValue.toLongLong(); 1058 | if (ll) { 1059 | varValue.varType = XBinary::VT_INT64; 1060 | varValue.var = ll; 1061 | } else if (_isDateTime(sValue)) { 1062 | varValue.varType = XBinary::VT_DATETIME; 1063 | varValue.var = _getDateTime(sValue); 1064 | } else if (_isString(sValue)) { 1065 | varValue.varType = XBinary::VT_STRING; 1066 | varValue.var = _getString(sValue); 1067 | } else if (_isHex(sValue)) { 1068 | varValue.varType = XBinary::VT_HEX; 1069 | varValue.var = _getHex(sValue); 1070 | } else { 1071 | varValue.varType = XBinary::VT_VALUE; 1072 | varValue.var = sValue; 1073 | } 1074 | 1075 | if (!varValue.var.isNull()) { 1076 | varResult = varValue; 1077 | } 1078 | } 1079 | } 1080 | 1081 | return varResult; 1082 | } 1083 | 1084 | QList XPDF::getParts(qint32 nPartLimit, PDSTRUCT *pPdStruct) 1085 | { 1086 | QList listResult; 1087 | 1088 | const QList listStrartHrefs = findStartxrefs(0, pPdStruct); 1089 | 1090 | qint32 nNumberOfHrefs = listStrartHrefs.count(); 1091 | 1092 | QList listObject; 1093 | if (nNumberOfHrefs) { 1094 | for (qint32 i = 0; (i < nNumberOfHrefs) && XBinary::isPdStructNotCanceled(pPdStruct); ++i) { 1095 | const STARTHREF &startxref = listStrartHrefs.at(i); 1096 | 1097 | if (startxref.bIsXref) { 1098 | listObject.append(getObjectsFromStartxref(&startxref, pPdStruct)); 1099 | } else if (startxref.bIsObject) { 1100 | // listObject = findObjects(startxref.nXrefOffset, startxref.nFooterOffset - startxref.nXrefOffset, true, pPdStruct); 1101 | listObject = findObjects(0, startxref.nFooterOffset, true, pPdStruct); 1102 | } 1103 | } 1104 | } else { 1105 | listObject = findObjects(0, -1, false, pPdStruct); 1106 | } 1107 | 1108 | qint32 nNumberOfObjects = listObject.count(); 1109 | 1110 | if (nNumberOfObjects) { 1111 | const qint32 _nFreeIndex = XBinary::getFreeIndex(pPdStruct); 1112 | XBinary::setPdStructInit(pPdStruct, _nFreeIndex, nNumberOfObjects); 1113 | 1114 | for (qint32 i = 0; (i < nNumberOfObjects) && XBinary::isPdStructNotCanceled(pPdStruct); ++i) { 1115 | const OBJECT &record = listObject.at(i); 1116 | 1117 | XPART xpart = handleXpart(record.nOffset, record.nID, nPartLimit, pPdStruct); 1118 | 1119 | listResult.append(xpart); 1120 | 1121 | XBinary::setPdStructCurrentIncrement(pPdStruct, _nFreeIndex); 1122 | } 1123 | 1124 | XBinary::setPdStructFinished(pPdStruct, _nFreeIndex); 1125 | } 1126 | 1127 | return listResult; 1128 | } 1129 | 1130 | QList XPDF::getValuesByKey(QList *pListObjects, const QString &sKey, PDSTRUCT *pPdStruct) 1131 | { 1132 | QList listResult; 1133 | QSet stVars; 1134 | 1135 | qint32 nNumberOfRecords = pListObjects->count(); 1136 | 1137 | const qint32 _nFreeIndex = XBinary::getFreeIndex(pPdStruct); 1138 | XBinary::setPdStructInit(pPdStruct, _nFreeIndex, nNumberOfRecords); 1139 | 1140 | for (qint32 i = 0; (i < nNumberOfRecords) && XBinary::isPdStructNotCanceled(pPdStruct); ++i) { 1141 | const XPART &record = pListObjects->at(i); 1142 | 1143 | qint32 nNumberOfParts = record.listParts.count(); 1144 | 1145 | for (qint32 j = 0; (j + 1 < nNumberOfParts) && XBinary::isPdStructNotCanceled(pPdStruct); ++j) { 1146 | const QString &sPart = record.listParts.at(j); 1147 | 1148 | if (sPart == sKey) { 1149 | const QString &sValue = record.listParts.at(j + 1); 1150 | XVARIANT varValue; 1151 | 1152 | qlonglong ll = sValue.toLongLong(); 1153 | if (ll) { 1154 | varValue.varType = XBinary::VT_INT64; 1155 | varValue.var = ll; 1156 | } else if (_isDateTime(sValue)) { 1157 | varValue.varType = XBinary::VT_DATETIME; 1158 | varValue.var = _getDateTime(sValue); 1159 | } else if (_isString(sValue)) { 1160 | varValue.varType = XBinary::VT_STRING; 1161 | varValue.var = _getString(sValue); 1162 | } else if (_isHex(sValue)) { 1163 | varValue.varType = XBinary::VT_HEX; 1164 | varValue.var = _getHex(sValue); 1165 | } else { 1166 | varValue.varType = XBinary::VT_VALUE; 1167 | varValue.var = sValue; 1168 | } 1169 | 1170 | if (!varValue.var.isNull()) { 1171 | const QString key = varValue.var.toString(); 1172 | if (!stVars.contains(key)) { 1173 | listResult.append(varValue); 1174 | stVars.insert(key); 1175 | } 1176 | } 1177 | } 1178 | } 1179 | 1180 | XBinary::setPdStructCurrentIncrement(pPdStruct, _nFreeIndex); 1181 | } 1182 | 1183 | XBinary::setPdStructFinished(pPdStruct, _nFreeIndex); 1184 | 1185 | return listResult; 1186 | } 1187 | 1188 | qint32 XPDF::getType() 1189 | { 1190 | return TYPE_DOCUMENT; 1191 | } 1192 | 1193 | QString XPDF::typeIdToString(qint32 nType) 1194 | { 1195 | QString sResult = tr("Unknown"); 1196 | 1197 | switch (nType) { 1198 | case TYPE_UNKNOWN: sResult = tr("Unknown"); break; 1199 | case TYPE_DOCUMENT: sResult = tr("Document"); break; 1200 | } 1201 | 1202 | return sResult; 1203 | } 1204 | 1205 | QString XPDF::getHeaderCommentAsHex(PDSTRUCT *pPdStruct) 1206 | { 1207 | QString sResult; 1208 | 1209 | const qint64 nFileSize = getSize(); 1210 | qint64 nCurrentOffset = 0; 1211 | 1212 | // Read header line ("%PDF-...") and advance just past its line ending 1213 | OS_STRING osString = _readPDFString(nCurrentOffset, 100, pPdStruct); 1214 | nCurrentOffset += osString.nSize; // _readPDFString already counted trailing EOL 1215 | 1216 | // Ensure we are within file bounds and next line starts with '%' 1217 | if ((nCurrentOffset < nFileSize) && (read_uint8(nCurrentOffset) == '%')) { 1218 | ++nCurrentOffset; 1219 | 1220 | QByteArray baData; 1221 | baData.reserve(40); 1222 | 1223 | const qint64 nMaxRead = qMin(40, nFileSize - nCurrentOffset); 1224 | for (qint32 i = 0; (i < nMaxRead) && XBinary::isPdStructNotCanceled(pPdStruct); ++i) { 1225 | quint8 nChar = read_uint8(nCurrentOffset + i); 1226 | 1227 | if ((nChar == 13) || (nChar == 10) || (nChar == 0)) { 1228 | break; 1229 | } 1230 | 1231 | baData.append(static_cast(nChar)); 1232 | } 1233 | 1234 | sResult = baData.toHex(); 1235 | } 1236 | 1237 | return sResult; 1238 | } 1239 | 1240 | QString XPDF::getFilters(PDSTRUCT *pPdStruct) 1241 | { 1242 | QString sResult; 1243 | 1244 | QList listParts = getParts(100, pPdStruct); // TODO limit 1245 | const QList listValues = getValuesByKey(&listParts, QLatin1String("/Filter"), pPdStruct); 1246 | 1247 | QStringList filters; 1248 | 1249 | const int nNumberOfValues = listValues.count(); 1250 | for (int i = 0; (i < nNumberOfValues) && XBinary::isPdStructNotCanceled(pPdStruct); ++i) { 1251 | const QString v = listValues.at(i).var.toString(); 1252 | if (!v.isEmpty()) filters.append(v); 1253 | } 1254 | 1255 | sResult = filters.join(QLatin1String(", ")); 1256 | return sResult; 1257 | } 1258 | 1259 | QString XPDF::getInfo(PDSTRUCT *pPdStruct) 1260 | { 1261 | QString sResult; 1262 | 1263 | sResult = getFilters(pPdStruct); 1264 | 1265 | return sResult; 1266 | } 1267 | 1268 | QList XPDF::getFileParts(quint32 nFileParts, qint32 nLimit, PDSTRUCT *pPdStruct) 1269 | { 1270 | // TODO limit 1271 | 1272 | QList listResult; 1273 | 1274 | qint64 nMaxOffset = 0; 1275 | QList listObject; 1276 | 1277 | const QList listStrartHrefs = findStartxrefs(0, pPdStruct); 1278 | const qint64 totalSize = getSize(); 1279 | 1280 | qint32 nNumberOfFrefs = listStrartHrefs.count(); 1281 | 1282 | if (nNumberOfFrefs) { 1283 | if (nFileParts & FILEPART_SIGNATURE) { 1284 | const OS_STRING osHeader = _readPDFString(0, 20, pPdStruct); 1285 | 1286 | FPART record = {}; 1287 | 1288 | record.filePart = FILEPART_SIGNATURE; 1289 | record.nFileOffset = 0; 1290 | record.nFileSize = osHeader.nSize; 1291 | record.nVirtualAddress = -1; 1292 | record.sName = tr("Signature"); 1293 | 1294 | listResult.append(record); 1295 | } 1296 | 1297 | for (int j = 0; (j < nNumberOfFrefs) && XBinary::isPdStructNotCanceled(pPdStruct); ++j) { 1298 | const STARTHREF &startxref = listStrartHrefs.at(j); 1299 | 1300 | if (startxref.bIsXref) { 1301 | if ((nFileParts & FILEPART_OBJECT) || (nFileParts & FILEPART_STREAM)) { 1302 | listObject.append(getObjectsFromStartxref(&startxref, pPdStruct)); 1303 | } 1304 | 1305 | if (nFileParts & FILEPART_TABLE) { 1306 | FPART record = {}; 1307 | 1308 | record.filePart = FILEPART_DATA; 1309 | record.nFileOffset = startxref.nXrefOffset; 1310 | record.nFileSize = startxref.nFooterOffset - startxref.nXrefOffset; 1311 | record.nVirtualAddress = -1; 1312 | record.sName = QStringLiteral("xref"); 1313 | 1314 | listResult.append(record); 1315 | } 1316 | } else if (startxref.bIsObject) { 1317 | if ((nFileParts & FILEPART_OBJECT) || (nFileParts & FILEPART_STREAM)) { 1318 | listObject.append(findObjects(0, startxref.nFooterOffset, true, pPdStruct)); 1319 | } 1320 | } 1321 | 1322 | // if (startxref.nFooterOffset - nCurrentOffset > 0) { 1323 | // // Trailer 1324 | // } 1325 | 1326 | if (nFileParts & FILEPART_FOOTER) { 1327 | FPART record = {}; 1328 | 1329 | record.filePart = FILEPART_FOOTER; 1330 | record.nFileOffset = startxref.nFooterOffset; 1331 | record.nFileSize = startxref.nFooterSize; 1332 | record.nVirtualAddress = -1; 1333 | record.sName = tr("Footer"); 1334 | 1335 | listResult.append(record); 1336 | } 1337 | } 1338 | 1339 | nMaxOffset = listStrartHrefs.at(nNumberOfFrefs - 1).nFooterOffset + listStrartHrefs.at(nNumberOfFrefs - 1).nFooterSize; 1340 | } else { 1341 | // File damaged; 1342 | listObject.append(findObjects(0, -1, false, pPdStruct)); 1343 | 1344 | qint32 nNumberOfObjects = listObject.count(); 1345 | 1346 | if (nNumberOfObjects) { 1347 | const OS_STRING osHeader = _readPDFString(0, 20, pPdStruct); 1348 | 1349 | if (nFileParts & FILEPART_SIGNATURE) { 1350 | FPART record = {}; 1351 | 1352 | record.filePart = FILEPART_SIGNATURE; 1353 | record.nFileOffset = 0; 1354 | record.nFileSize = osHeader.nSize; 1355 | record.nVirtualAddress = -1; 1356 | record.sName = tr("Header"); 1357 | 1358 | listResult.append(record); 1359 | } 1360 | 1361 | nMaxOffset = osHeader.nSize; 1362 | } 1363 | 1364 | for (qint32 i = 0; (i < nNumberOfObjects) && XBinary::isPdStructNotCanceled(pPdStruct); ++i) { 1365 | const OBJECT &o = listObject.at(i); 1366 | const qint64 end = o.nOffset + o.nSize; 1367 | if (end > nMaxOffset) nMaxOffset = end; 1368 | } 1369 | } 1370 | 1371 | if (nFileParts & FILEPART_DATA) { 1372 | FPART record = {}; 1373 | 1374 | record.filePart = FILEPART_DATA; 1375 | record.nFileOffset = 0; 1376 | record.nFileSize = nMaxOffset; 1377 | record.nVirtualAddress = -1; 1378 | record.sName = tr("Data"); 1379 | 1380 | listResult.append(record); 1381 | } 1382 | 1383 | if ((nFileParts & FILEPART_STREAM) || (nFileParts & FILEPART_OBJECT)) { 1384 | qint32 nNumberOfObjects = listObject.count(); 1385 | qint32 nStreamNumber = 0; 1386 | 1387 | for (qint32 i = 0; (i < nNumberOfObjects) && XBinary::isPdStructNotCanceled(pPdStruct); ++i) { 1388 | const OBJECT &object = listObject.at(i); 1389 | 1390 | if (nFileParts & FILEPART_OBJECT) { 1391 | FPART record = {}; 1392 | 1393 | record.filePart = FILEPART_OBJECT; 1394 | record.nFileOffset = object.nOffset; 1395 | record.nFileSize = object.nSize; 1396 | record.nVirtualAddress = -1; 1397 | record.sName = QString("%1 %2").arg(tr("Object"), QString::number(object.nID)); 1398 | 1399 | listResult.append(record); 1400 | } 1401 | 1402 | if (nFileParts & FILEPART_STREAM) { 1403 | XPART xpart = handleXpart(object.nOffset, object.nID, -1, pPdStruct); 1404 | 1405 | qint32 nNumberOfStreams = xpart.listStreams.count(); 1406 | 1407 | for (qint32 j = 0; (j < nNumberOfStreams) && XBinary::isPdStructNotCanceled(pPdStruct); ++j) { 1408 | const STREAM &stream = xpart.listStreams.at(j); 1409 | 1410 | XBinary::FPART record = {}; 1411 | record.nFileOffset = stream.nOffset; 1412 | record.nFileSize = stream.nSize; 1413 | record.sName = QString("%1 obj (%2)").arg(tr("Stream"), QString::number(object.nID)); 1414 | record.filePart = XBinary::FILEPART_STREAM; 1415 | record.nVirtualAddress = -1; 1416 | 1417 | const QString sFilter = getFirstStringValueByKey(&(xpart.listParts), QLatin1String("/Filter"), pPdStruct).var.toString(); 1418 | 1419 | if (sFilter == QLatin1String("/FlateDecode")) { 1420 | // ZLIB 1421 | record.mapProperties.insert(FPART_PROP_COMPRESSMETHOD, COMPRESS_METHOD_ZLIB); 1422 | } else if (sFilter == QLatin1String("/LZWDecode")) { 1423 | record.mapProperties.insert(FPART_PROP_COMPRESSMETHOD, COMPRESS_METHOD_LZW_PDF); 1424 | // record.mapProperties.insert(FPART_PROP_COMPRESSMETHOD, COMPRESS_METHOD_STORE); 1425 | } else if (sFilter == QLatin1String("/ASCII85Decode")) { 1426 | record.mapProperties.insert(FPART_PROP_COMPRESSMETHOD, COMPRESS_METHOD_ASCII85); 1427 | } else if (sFilter == QLatin1String("/DCTDecode")) { 1428 | // JPEG 1429 | record.mapProperties.insert(FPART_PROP_COMPRESSMETHOD, COMPRESS_METHOD_STORE); 1430 | } else if (sFilter == QLatin1String("/CCITTFaxDecode")) { 1431 | // JPEG 1432 | record.mapProperties.insert(FPART_PROP_COMPRESSMETHOD, COMPRESS_METHOD_STORE); 1433 | } else if (sFilter == QLatin1String("[")) { 1434 | #ifdef QT_DEBUG 1435 | qDebug() << "Unknown filter:" << sFilter << xpart.listParts << record.sName; 1436 | #endif 1437 | } else { 1438 | #ifdef QT_DEBUG 1439 | qDebug() << "Unknown filter:" << sFilter << xpart.listParts << record.sName; 1440 | #endif 1441 | // TODO 1442 | record.mapProperties.insert(FPART_PROP_COMPRESSMETHOD, COMPRESS_METHOD_STORE); 1443 | } 1444 | 1445 | if (!sFilter.isEmpty()) { 1446 | if (getFirstStringValueByKey(&(xpart.listParts), QLatin1String("/Subtype"), pPdStruct).var.toString() == QLatin1String("/Image")) { 1447 | const qint32 nWidth = getFirstStringValueByKey(&(xpart.listParts), QLatin1String("/Width"), pPdStruct).var.toInt(); 1448 | const qint32 nHeight = getFirstStringValueByKey(&(xpart.listParts), QLatin1String("/Height"), pPdStruct).var.toInt(); 1449 | const qint32 nBitsPerComponent = getFirstStringValueByKey(&(xpart.listParts), QLatin1String("/BitsPerComponent"), pPdStruct).var.toInt(); 1450 | 1451 | record.mapProperties.insert(FPART_PROP_FILETYPE, XBinary::FT_PNG); 1452 | record.mapProperties.insert(FPART_PROP_HANDLEMETHOD, XBinary::HANDLE_METHOD_PDF_IMAGEDATA); 1453 | record.mapProperties.insert(FPART_PROP_WIDTH, nWidth); 1454 | record.mapProperties.insert(FPART_PROP_HEIGHT, nHeight); 1455 | record.mapProperties.insert(FPART_PROP_BITSPERCOMPONENT, nBitsPerComponent); 1456 | record.mapProperties.insert(FPART_PROP_EXT, QStringLiteral("png")); 1457 | record.mapProperties.insert( 1458 | FPART_PROP_INFO, QString("%1 (%2 x %3) [%4]") 1459 | .arg(tr("Raw image data"), QString::number(nWidth), QString::number(nHeight), QString::number(nBitsPerComponent))); 1460 | } 1461 | } 1462 | 1463 | // qDebug() << "Filter:" << sFilter << xpart.listParts << record.sName; 1464 | 1465 | // if (stream.nSize >= 6) { 1466 | // quint16 nHeader = read_uint16(record.nFileOffset); 1467 | // if ((nHeader == 0x5E78) || (nHeader == 0x9C78) || (nHeader == 0xDA78)) { 1468 | 1469 | // if (getFirstStringValueByKey(&(xpart.listParts), "/Subtype", pPdStruct).var.toString() == "/Image") { 1470 | // qDebug() << xpart.listParts << record.sName; 1471 | // } 1472 | // compMethod = COMPRESS_METHOD_ZLIB; 1473 | // } 1474 | // } 1475 | 1476 | listResult.append(record); 1477 | 1478 | nStreamNumber++; 1479 | } 1480 | } 1481 | } 1482 | } 1483 | 1484 | if (nFileParts & FILEPART_OVERLAY) { 1485 | if (nMaxOffset < totalSize) { 1486 | FPART record = {}; 1487 | 1488 | record.filePart = FILEPART_OVERLAY; 1489 | record.nFileOffset = nMaxOffset; 1490 | record.nFileSize = totalSize - nMaxOffset; 1491 | record.nVirtualAddress = -1; 1492 | record.sName = tr("Overlay"); 1493 | 1494 | listResult.append(record); 1495 | } 1496 | } 1497 | 1498 | return listResult; 1499 | } 1500 | 1501 | bool XPDF::initUnpack(UNPACK_STATE *pState, const QMap &mapProperties, PDSTRUCT *pPdStruct) 1502 | { 1503 | Q_UNUSED(mapProperties) 1504 | 1505 | if (!pState) { 1506 | return false; 1507 | } 1508 | 1509 | // Initialize state 1510 | pState->nCurrentOffset = 0; 1511 | pState->nTotalSize = getSize(); 1512 | pState->nCurrentIndex = 0; 1513 | pState->mapProperties = mapProperties; 1514 | pState->pContext = nullptr; 1515 | 1516 | // Get all streams from the PDF 1517 | QList listStreams = getFileParts(FILEPART_STREAM, -1, pPdStruct); 1518 | 1519 | if (XBinary::isPdStructNotCanceled(pPdStruct)) { 1520 | // Create context 1521 | UNPACK_CONTEXT *pContext = new UNPACK_CONTEXT; 1522 | pContext->listStreams = listStreams; 1523 | pContext->nCurrentStreamIndex = 0; 1524 | 1525 | pState->pContext = pContext; 1526 | pState->nNumberOfRecords = listStreams.count(); 1527 | 1528 | return true; 1529 | } 1530 | 1531 | return false; 1532 | } 1533 | 1534 | XBinary::ARCHIVERECORD XPDF::infoCurrent(UNPACK_STATE *pState, PDSTRUCT *pPdStruct) 1535 | { 1536 | Q_UNUSED(pPdStruct) 1537 | 1538 | XBinary::ARCHIVERECORD result = {}; 1539 | 1540 | if (!pState || !pState->pContext) { 1541 | return result; 1542 | } 1543 | 1544 | UNPACK_CONTEXT *pContext = (UNPACK_CONTEXT *)pState->pContext; 1545 | 1546 | if (pContext->nCurrentStreamIndex < 0 || pContext->nCurrentStreamIndex >= pContext->listStreams.count()) { 1547 | return result; 1548 | } 1549 | 1550 | const XBinary::FPART &stream = pContext->listStreams.at(pContext->nCurrentStreamIndex); 1551 | 1552 | // Fill archive record 1553 | result.nStreamOffset = stream.nFileOffset; 1554 | result.nStreamSize = stream.nFileSize; 1555 | 1556 | // Generate filename from stream name 1557 | QString sFileName = stream.sName; 1558 | // Replace invalid filename characters 1559 | sFileName = sFileName.replace(QLatin1Char('/'), QLatin1Char('_')); 1560 | sFileName = sFileName.replace(QLatin1Char('\\'), QLatin1Char('_')); 1561 | sFileName = sFileName.replace(QLatin1Char(':'), QLatin1Char('_')); 1562 | sFileName = sFileName.replace(QLatin1Char('*'), QLatin1Char('_')); 1563 | sFileName = sFileName.replace(QLatin1Char('?'), QLatin1Char('_')); 1564 | sFileName = sFileName.replace(QLatin1Char('"'), QLatin1Char('_')); 1565 | sFileName = sFileName.replace(QLatin1Char('<'), QLatin1Char('_')); 1566 | sFileName = sFileName.replace(QLatin1Char('>'), QLatin1Char('_')); 1567 | sFileName = sFileName.replace(QLatin1Char('|'), QLatin1Char('_')); 1568 | 1569 | // Add extension based on file type or compression 1570 | QString sExt = stream.mapProperties.value(FPART_PROP_EXT).toString(); 1571 | if (sExt.isEmpty()) { 1572 | // Default extension based on compression method 1573 | COMPRESS_METHOD compMethod = (COMPRESS_METHOD)stream.mapProperties.value(FPART_PROP_COMPRESSMETHOD, COMPRESS_METHOD_STORE).toInt(); 1574 | if (compMethod == COMPRESS_METHOD_STORE) { 1575 | sExt = QStringLiteral("bin"); 1576 | } else { 1577 | sExt = QStringLiteral("dat"); 1578 | } 1579 | } 1580 | 1581 | if (!sExt.isEmpty()) { 1582 | sFileName = QString("%1_%2.%3").arg(sFileName, QString::number(pContext->nCurrentStreamIndex), sExt); 1583 | } else { 1584 | sFileName = QString("%1_%2").arg(sFileName, QString::number(pContext->nCurrentStreamIndex)); 1585 | } 1586 | 1587 | result.mapProperties.insert(FPART_PROP_ORIGINALNAME, sFileName); 1588 | result.mapProperties.insert(FPART_PROP_UNCOMPRESSEDSIZE, stream.nFileSize); 1589 | result.mapProperties.insert(FPART_PROP_COMPRESSEDSIZE, stream.nFileSize); 1590 | 1591 | // Copy relevant properties from FPART 1592 | QMapIterator it(stream.mapProperties); 1593 | while (it.hasNext()) { 1594 | it.next(); 1595 | result.mapProperties.insert(it.key(), it.value()); 1596 | } 1597 | 1598 | return result; 1599 | } 1600 | 1601 | bool XPDF::unpackCurrent(UNPACK_STATE *pState, QIODevice *pDevice, PDSTRUCT *pPdStruct) 1602 | { 1603 | if (!pState || !pState->pContext || !pDevice) { 1604 | return false; 1605 | } 1606 | 1607 | UNPACK_CONTEXT *pContext = (UNPACK_CONTEXT *)pState->pContext; 1608 | 1609 | if (pContext->nCurrentStreamIndex < 0 || pContext->nCurrentStreamIndex >= pContext->listStreams.count()) { 1610 | return false; 1611 | } 1612 | 1613 | const XBinary::FPART &stream = pContext->listStreams.at(pContext->nCurrentStreamIndex); 1614 | 1615 | // Read stream data 1616 | QByteArray baData = read_array_process(stream.nFileOffset, stream.nFileSize, pPdStruct); 1617 | 1618 | if (XBinary::isPdStructNotCanceled(pPdStruct)) { 1619 | // Check if decompression is needed 1620 | COMPRESS_METHOD compMethod = (COMPRESS_METHOD)stream.mapProperties.value(FPART_PROP_COMPRESSMETHOD, COMPRESS_METHOD_STORE).toInt(); 1621 | 1622 | if (compMethod != COMPRESS_METHOD_STORE) { 1623 | // Decompress the data 1624 | QBuffer sourceBuffer(&baData); 1625 | sourceBuffer.open(QIODevice::ReadOnly); 1626 | 1627 | QBuffer destBuffer; 1628 | destBuffer.open(QIODevice::WriteOnly); 1629 | 1630 | XArchive::DECOMPRESSSTRUCT decompressStruct = {}; 1631 | decompressStruct.spInfo.compressMethod = compMethod; 1632 | decompressStruct.pSourceDevice = &sourceBuffer; 1633 | decompressStruct.pDestDevice = &destBuffer; 1634 | decompressStruct.nInSize = baData.size(); 1635 | decompressStruct.nOutSize = -1; 1636 | decompressStruct.nDecompressedOffset = 0; 1637 | decompressStruct.nDecompressedLimit = -1; 1638 | decompressStruct.bLimit = false; 1639 | 1640 | XArchive::COMPRESS_RESULT compressResult = XArchive::_decompress(&decompressStruct, pPdStruct); 1641 | 1642 | sourceBuffer.close(); 1643 | destBuffer.close(); 1644 | 1645 | if (compressResult == XArchive::COMPRESS_RESULT_OK) { 1646 | baData = destBuffer.data(); 1647 | } else { 1648 | // Decompression failed, use original data 1649 | #ifdef QT_DEBUG 1650 | qDebug() << "XPDF::unpackCurrent: Decompression failed, using original data"; 1651 | #endif 1652 | } 1653 | } 1654 | 1655 | // Write data to output device 1656 | qint64 nWritten = pDevice->write(baData); 1657 | return (nWritten == baData.size()); 1658 | } 1659 | 1660 | return false; 1661 | } 1662 | 1663 | bool XPDF::moveToNext(UNPACK_STATE *pState, PDSTRUCT *pPdStruct) 1664 | { 1665 | Q_UNUSED(pPdStruct) 1666 | 1667 | if (!pState || !pState->pContext) { 1668 | return false; 1669 | } 1670 | 1671 | UNPACK_CONTEXT *pContext = (UNPACK_CONTEXT *)pState->pContext; 1672 | 1673 | pContext->nCurrentStreamIndex++; 1674 | pState->nCurrentIndex = pContext->nCurrentStreamIndex; 1675 | 1676 | return true; 1677 | } 1678 | 1679 | bool XPDF::finishUnpack(UNPACK_STATE *pState, PDSTRUCT *pPdStruct) 1680 | { 1681 | Q_UNUSED(pPdStruct) 1682 | 1683 | if (!pState) { 1684 | return false; 1685 | } 1686 | 1687 | if (pState->pContext) { 1688 | UNPACK_CONTEXT *pContext = (UNPACK_CONTEXT *)pState->pContext; 1689 | delete pContext; 1690 | pState->pContext = nullptr; 1691 | } 1692 | 1693 | pState->nCurrentIndex = 0; 1694 | pState->nNumberOfRecords = 0; 1695 | pState->nCurrentOffset = 0; 1696 | 1697 | return true; 1698 | } 1699 | --------------------------------------------------------------------------------