Improve performances for stream parsing

This commit is contained in:
Grégory Soutadé 2021-09-09 20:46:46 +02:00
parent f299c69654
commit f243271215
2 changed files with 43 additions and 12 deletions

View File

@ -93,7 +93,7 @@ namespace uPDFParser
Array* parseArray(Object* object); Array* parseArray(Object* object);
String* parseString(); String* parseString();
HexaString* parseHexaString(); HexaString* parseHexaString();
Stream* parseStream(); Stream* parseStream(Object* object);
Name* parseName(std::string& token); Name* parseName(std::string& token);
void writeUpdate(const std::string& filename); void writeUpdate(const std::string& filename);

View File

@ -316,7 +316,7 @@ namespace uPDFParser
else if (token == "<") else if (token == "<")
value = parseHexaString(); value = parseHexaString();
else if (token == "stream") else if (token == "stream")
value = parseStream(); value = parseStream(object);
else if (token[0] >= '1' && token[0] <= '9') else if (token[0] >= '1' && token[0] <= '9')
value = parseNumberOrReference(token); value = parseNumberOrReference(token);
else if (token[0] == '/') else if (token[0] == '/')
@ -401,20 +401,51 @@ namespace uPDFParser
return new HexaString(res); return new HexaString(res);
} }
Stream* Parser::parseStream() Stream* Parser::parseStream(Object* object)
{ {
char buffer[1024]; off_t startOffset, endOffset;
off_t endOffset; std::string token;
// std::cout << "parseStream" << std::endl;
startOffset = lseek(fd, 0, SEEK_CUR);
while (1) if (!object->hasKey("Length"))
EXCEPTION(INVALID_STREAM, "No Length property at offset " << curOffset);
DataType* Length = (*object)["Length"];
if (Length->type() != DataType::INTEGER)
{ {
endOffset = lseek(fd, 0, SEEK_CUR); if (Length->type() != DataType::REFERENCE)
readline(fd, buffer, sizeof(buffer)); EXCEPTION(INVALID_STREAM, "Invalid Length property at offset " << curOffset);
if (!strncmp(buffer, "endstream", 9))
break;
}
return new Stream(curOffset, endOffset); // Don't want to parse xref table...
while (1)
{
char buffer[4*1024];
int ret;
endOffset = lseek(fd, 0, SEEK_CUR);
ret = readline(fd, buffer, sizeof(buffer));
if (!strncmp(buffer, "endstream", 9))
{
lseek(fd, -(ret-9), SEEK_CUR);
break;
}
}
return new Stream(startOffset, endOffset);
}
Integer* length = (Integer*)Length;
endOffset = startOffset + length->value();
lseek(fd, endOffset, SEEK_SET);
token = nextToken();
if (token != "endstream")
EXCEPTION(INVALID_STREAM, "endstream not found at offset " << endOffset);
// std::cout << "end parseStream" << std::endl;
return new Stream(startOffset, endOffset);
} }
Name* Parser::parseName(std::string& name) Name* Parser::parseName(std::string& name)