Fix \r\n handling in stream read and write functions

This commit is contained in:
Grégory Soutadé 2023-01-07 15:43:39 +01:00
parent 68371b9b17
commit fee7ec9cc3
3 changed files with 29 additions and 16 deletions

View File

@ -104,6 +104,7 @@ namespace uPDFParser
bool parseXref(); bool parseXref();
bool parseTrailer(); bool parseTrailer();
char prevChar();
std::string nextToken(bool exceptionOnEOF=true, bool readComment=false); std::string nextToken(bool exceptionOnEOF=true, bool readComment=false);
DataType* parseType(std::string& token, Object* object, std::map<std::string, DataType*>& dict); DataType* parseType(std::string& token, Object* object, std::map<std::string, DataType*>& dict);
@ -121,6 +122,7 @@ namespace uPDFParser
void writeBuffer(int fd, const char* buffer, int size); void writeBuffer(int fd, const char* buffer, int size);
void writeUpdate(const std::string& filename); void writeUpdate(const std::string& filename);
char c;
int version_major, version_minor; int version_major, version_minor;
std::vector<Object*> _objects; std::vector<Object*> _objects;
Object trailer, *xrefObject; Object trailer, *xrefObject;

View File

@ -157,19 +157,22 @@ namespace uPDFParser
} }
} }
char Parser::prevChar() { return c; }
/** /**
* @brief Find next token to analyze * @brief Find next token to analyze
*/ */
std::string Parser::nextToken(bool exceptionOnEOF, bool readComment) std::string Parser::nextToken(bool exceptionOnEOF, bool readComment)
{ {
char c = 0, prev_c; char prev_c;
std::string res(""); std::string res("");
int i; int i;
static const char delims[] = " \t<>[]()/"; static const char delims[] = " \t<>[]()/";
static const char whitespace_prev_delims[] = "+-"; // Need whitespace before static const char whitespace_prev_delims[] = "+-"; // Need whitespace before
static const char start_delims[] = "<>[]()"; static const char start_delims[] = "<>[]()";
bool found = false; bool found = false;
c = 0;
while (!found) while (!found)
{ {
prev_c = c; prev_c = c;
@ -574,9 +577,19 @@ namespace uPDFParser
{ {
off_t startOffset, endOffset, endStream; off_t startOffset, endOffset, endStream;
std::string token; std::string token;
char c = 0;
// std::cout << "parseStream" << std::endl; // std::cout << "parseStream" << std::endl;
// Remove \n after \r if there is one
if (prevChar() == '\r' && read(fd, &c, 1) == 1)
{
if (c != '\n')
{
lseek(fd, -1, SEEK_CUR);
}
}
startOffset = lseek(fd, 0, SEEK_CUR); startOffset = lseek(fd, 0, SEEK_CUR);
if (!object->hasKey("Length")) if (!object->hasKey("Length"))
@ -622,9 +635,16 @@ namespace uPDFParser
ret = read(fd, &c, 1); ret = read(fd, &c, 1);
if (ret <= 0) if (ret <= 0)
break; break;
if (c != '\n' && c != '\r') if (c == '\r')
break; {
lseek(fd, -1, SEEK_CUR); lseek(fd, -1, SEEK_CUR);
continue;
}
else if (c == '\n')
{
lseek(fd, -1, SEEK_CUR);
}
break;
} }
// Adjust final position // Adjust final position
lseek(fd, endStream, SEEK_SET); lseek(fd, endStream, SEEK_SET);

View File

@ -105,19 +105,10 @@ namespace uPDFParser
std::string Stream::str() std::string Stream::str()
{ {
std::string res = "stream"; std::string res = "stream\n";
const char* streamData = (const char*)data(); // Force reading if not in memory const char* streamData = (const char*)data(); // Force reading if not in memory
if (_dataLength &&
streamData[0] != '\n' &&
streamData[0] != '\r')
res += "\n";
res += std::string(streamData, _dataLength); res += std::string(streamData, _dataLength);
// Be sure there is a final line return res += "\nendstream\n";
if (_dataLength &&
streamData[_dataLength-1] != '\n' &&
streamData[_dataLength-1] != '\r')
res += "\n";
res += "endstream\n";
return res; return res;
} }