uPDFParser

uPDFParser Commit Details

Date:2021-09-09 20:46:46 (1 month 8 days ago)
Author:Grégory Soutadé
Branch:master
Commit:f2432712159b22b57f82c5450ad79421c4c4cd9c
Parents: f299c6965408bd8942e1f3acc38f0b7eb00bf24f
Message:Improve performances for stream parsing

Changes:
Minclude/uPDFParser.h (1 diff)
Msrc/uPDFParser.cpp (2 diffs)

File differences

include/uPDFParser.h
9393
9494
9595
96
96
9797
9898
9999
Array* parseArray(Object* object);
String* parseString();
HexaString* parseHexaString();
Stream* parseStream();
Stream* parseStream(Object* object);
Name* parseName(std::string& token);
void writeUpdate(const std::string& filename);
src/uPDFParser.cpp
316316
317317
318318
319
319
320320
321321
322322
......
401401
402402
403403
404
404
405405
406
407
406
407
408
409
410
411
408412
409
413
414
415
416
417
410418
411
412
413
414
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
415436
437
438
439
440
441
442
443
444
445
446
416447
417
448
418449
419450
420451
else if (token == "<")
value = parseHexaString();
else if (token == "stream")
value = parseStream();
value = parseStream(object);
else if (token[0] >= '1' && token[0] <= '9')
value = parseNumberOrReference(token);
else if (token[0] == '/')
return new HexaString(res);
}
Stream* Parser::parseStream()
Stream* Parser::parseStream(Object* object)
{
char buffer[1024];
off_t endOffset;
off_t startOffset, endOffset;
std::string token;
// std::cout << "parseStream" << std::endl;
startOffset = lseek(fd, 0, SEEK_CUR);
while (1)
if (!object->hasKey("Length"))
EXCEPTION(INVALID_STREAM, "No Length property at offset " << curOffset);
DataType* Length = (*object)["Length"];
if (Length->type() != DataType::INTEGER)
{
endOffset = lseek(fd, 0, SEEK_CUR);
readline(fd, buffer, sizeof(buffer));
if (!strncmp(buffer, "endstream", 9))
break;
if (Length->type() != DataType::REFERENCE)
EXCEPTION(INVALID_STREAM, "Invalid Length property at offset " << curOffset);
// Don't want to parse xref table...
while (1)
{
char buffer[4*1024];
int ret;
endOffset = lseek(fd, 0, SEEK_CUR);
ret = readline(fd, buffer, sizeof(buffer));
if (!strncmp(buffer, "endstream", 9))
{
lseek(fd, -(ret-9), SEEK_CUR);
break;
}
}
return new Stream(startOffset, endOffset);
}
Integer* length = (Integer*)Length;
endOffset = startOffset + length->value();
lseek(fd, endOffset, SEEK_SET);
token = nextToken();
if (token != "endstream")
EXCEPTION(INVALID_STREAM, "endstream not found at offset " << endOffset);
// std::cout << "end parseStream" << std::endl;
return new Stream(curOffset, endOffset);
return new Stream(startOffset, endOffset);
}
Name* Parser::parseName(std::string& name)

Archive Download the corresponding diff file

Branches