Kaydet (Commit) 17fe34ec authored tarafından Caolán McNamara's avatar Caolán McNamara

here calc, take ownership of this foul monstrosity only you use

SvStream::ReadCsvLine doesn't need to be a member of Stream and the subcomment
about what's wrong with the method is longer than the body of the method.

Only used by calc, so can go into calc.

foul monstrosity back
üst e5ac3524
......@@ -476,7 +476,7 @@ bool ScImportAsciiDlg::GetLine( sal_uLong nLine, String &rText )
bRet = false;
break;
}
mpDatStream->ReadCsvLine( rText, !bFixed, maFieldSeparators,
ReadCsvLine(*mpDatStream, rText, !bFixed, maFieldSeparators,
mcTextSep);
mnStreamPos = mpDatStream->Tell();
mpRowPosArray[++mnRowPosCount] = mnStreamPos;
......@@ -494,7 +494,7 @@ bool ScImportAsciiDlg::GetLine( sal_uLong nLine, String &rText )
else
{
Seek( mpRowPosArray[nLine]);
mpDatStream->ReadCsvLine( rText, !bFixed, maFieldSeparators, mcTextSep);
ReadCsvLine(*mpDatStream, rText, !bFixed, maFieldSeparators, mcTextSep);
mnStreamPos = mpDatStream->Tell();
}
......
......@@ -1180,7 +1180,7 @@ sal_Bool ScImportExport::ExtText2Doc( SvStream& rStrm )
while(--nSkipLines>0)
{
rStrm.ReadCsvLine( aLine, !bFixed, rSeps, cStr); // content is ignored
ReadCsvLine(rStrm, aLine, !bFixed, rSeps, cStr); // content is ignored
if ( rStrm.IsEof() )
break;
}
......@@ -1203,7 +1203,7 @@ sal_Bool ScImportExport::ExtText2Doc( SvStream& rStrm )
{
for( ;; )
{
rStrm.ReadCsvLine( aLine, !bFixed, rSeps, cStr);
ReadCsvLine(rStrm, aLine, !bFixed, rSeps, cStr);
if ( rStrm.IsEof() )
break;
......@@ -2109,4 +2109,85 @@ ScFormatFilterPlugin &ScFormatFilter::Get()
return *plugin;
}
// Precondition: pStr is guaranteed to be non-NULL and points to a 0-terminated
// array.
inline const sal_Unicode* lcl_UnicodeStrChr( const sal_Unicode* pStr,
sal_Unicode c )
{
while (*pStr)
{
if (*pStr == c)
return pStr;
++pStr;
}
return 0;
}
void ReadCsvLine(SvStream &rStream, String& rStr, sal_Bool bEmbeddedLineBreak,
const String& rFieldSeparators, sal_Unicode cFieldQuote,
sal_Bool bAllowBackslashEscape)
{
rStream.ReadUniOrByteStringLine(rStr, rStream.GetStreamCharSet());
if (bEmbeddedLineBreak)
{
const sal_Unicode* pSeps = rFieldSeparators.GetBuffer();
// See if the separator(s) include tab.
bool bTabSep = lcl_UnicodeStrChr(pSeps, '\t') != NULL;
xub_StrLen nLastOffset = 0;
xub_StrLen nQuotes = 0;
while (!rStream.IsEof() && rStr.Len() < STRING_MAXLEN)
{
bool bBackslashEscaped = false;
const sal_Unicode *p, *pStart;
p = pStart = rStr.GetBuffer();
p += nLastOffset;
while (*p)
{
if (nQuotes)
{
if (bTabSep && *p == '\t' && (nQuotes % 2) != 0)
{
// When tab-delimited, tab char ends quoted sequence
// even if we haven't reached the end quote. Doing
// this helps keep mal-formed rows from damaging
// other, well-formed rows.
nQuotes = 0;
break;
}
if (*p == cFieldQuote && !bBackslashEscaped)
++nQuotes;
else if (bAllowBackslashEscape)
{
if (*p == '\\')
bBackslashEscaped = !bBackslashEscaped;
else
bBackslashEscaped = false;
}
}
else if (*p == cFieldQuote && (p == pStart ||
lcl_UnicodeStrChr( pSeps, p[-1])))
nQuotes = 1;
// A quote character inside a field content does not start
// a quote.
++p;
}
if (nQuotes % 2 == 0)
break;
else
{
nLastOffset = rStr.Len();
String aNext;
rStream.ReadUniOrByteStringLine(aNext, rStream.GetStreamCharSet());
rStr += sal_Unicode(_LF);
rStr += aNext;
}
}
}
}
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
......@@ -189,6 +189,59 @@ public:
}
};
/** Read a CSV (comma separated values) data line using
ReadUniOrByteStringLine().
@param bEmbeddedLineBreak
If sal_True and a line-break occurs inside a field of data,
a line feed LF '\n' and the next line are appended. Repeats
until a line-break is not in a field. A field is determined
by delimiting rFieldSeparators and optionally surrounded by
a pair of cFieldQuote characters. For a line-break to be
within a field, the field content MUST be surrounded by
cFieldQuote characters, and the opening cFieldQuote MUST be
at the very start of a line or follow right behind a field
separator with no extra characters in between. Anything,
including field separators and escaped quotes (by doubling
them, or preceding them with a backslash if
bAllowBackslashEscape==sal_True) may appear in a quoted
field.
If bEmbeddedLineBreak==sal_False, nothing is parsed and the
string returned is simply one ReadUniOrByteStringLine().
@param rFieldSeparators
A list of characters that each may act as a field separator.
@param cFieldQuote
The quote character used.
@param bAllowBackslashEscape
If sal_True, an embedded quote character inside a quoted
field may also be escaped with a preceding backslash.
Normally, quotes are escaped by doubling them.
check Stream::good() to detect IO problems during read
@ATTENTION
Note that the string returned may be truncated even inside
a quoted field if STRING_MAXLEN was reached. There
currently is no way to exactly determine the conditions,
whether this was at a line end, or whether open quotes
would have closed the field before the line end, as even a
ReadUniOrByteStringLine() may return prematurely but the
stream was positioned ahead until the real end of line.
Additionally, due to character encoding conversions, string
length and bytes read don't necessarily match, and
resyncing to a previous position matching the string's
length isn't always possible. As a result, a logical line
with embedded line breaks and more than STRING_MAXLEN
characters will be spoiled, and a subsequent ReadCsvLine()
may start under false preconditions.
*/
SC_DLLPUBLIC void ReadCsvLine(SvStream &rStream, String& rStr, sal_Bool bEmbeddedLineBreak,
const String& rFieldSeparators, sal_Unicode cFieldQuote,
sal_Bool bAllowBackslashEscape = sal_False);
#endif
......
......@@ -432,60 +432,6 @@ public:
sal_Bool WriteUniOrByteChar( sal_Unicode ch )
{ return WriteUniOrByteChar( ch, GetStreamCharSet() ); }
/** Read a CSV (comma separated values) data line using
ReadUniOrByteStringLine().
@param bEmbeddedLineBreak
If sal_True and a line-break occurs inside a field of data,
a line feed LF '\n' and the next line are appended. Repeats
until a line-break is not in a field. A field is determined
by delimiting rFieldSeparators and optionally surrounded by
a pair of cFieldQuote characters. For a line-break to be
within a field, the field content MUST be surrounded by
cFieldQuote characters, and the opening cFieldQuote MUST be
at the very start of a line or follow right behind a field
separator with no extra characters in between. Anything,
including field separators and escaped quotes (by doubling
them, or preceding them with a backslash if
bAllowBackslashEscape==sal_True) may appear in a quoted
field.
If bEmbeddedLineBreak==sal_False, nothing is parsed and the
string returned is simply one ReadUniOrByteStringLine().
@param rFieldSeparators
A list of characters that each may act as a field separator.
@param cFieldQuote
The quote character used.
@param bAllowBackslashEscape
If sal_True, an embedded quote character inside a quoted
field may also be escaped with a preceding backslash.
Normally, quotes are escaped by doubling them.
check Stream::good() to detect IO problems during read
@ATTENTION
Note that the string returned may be truncated even inside
a quoted field if STRING_MAXLEN was reached. There
currently is no way to exactly determine the conditions,
whether this was at a line end, or whether open quotes
would have closed the field before the line end, as even a
ReadUniOrByteStringLine() may return prematurely but the
stream was positioned ahead until the real end of line.
Additionally, due to character encoding conversions, string
length and bytes read don't necessarily match, and
resyncing to a previous position matching the string's
length isn't always possible. As a result, a logical line
with embedded line breaks and more than STRING_MAXLEN
characters will be spoiled, and a subsequent ReadCsvLine()
may start under false preconditions.
*/
void ReadCsvLine( String& rStr, sal_Bool bEmbeddedLineBreak,
const String& rFieldSeparators, sal_Unicode cFieldQuote,
sal_Bool bAllowBackslashEscape = sal_False);
void SetBufferSize( sal_uInt16 nBufSize );
sal_uInt16 GetBufferSize() const { return nBufSize; }
......
......@@ -1017,93 +1017,6 @@ sal_Bool SvStream::StartReadingUnicodeText( rtl_TextEncoding eReadBomCharSet )
return nError == SVSTREAM_OK;
}
/*************************************************************************
|*
|* Stream::ReadCsvLine()
|*
*************************************************************************/
// Precondition: pStr is guaranteed to be non-NULL and points to a 0-terminated
// array.
inline const sal_Unicode* lcl_UnicodeStrChr( const sal_Unicode* pStr,
sal_Unicode c )
{
while (*pStr)
{
if (*pStr == c)
return pStr;
++pStr;
}
return 0;
}
void SvStream::ReadCsvLine( String& rStr, sal_Bool bEmbeddedLineBreak,
const String& rFieldSeparators, sal_Unicode cFieldQuote,
sal_Bool bAllowBackslashEscape)
{
ReadUniOrByteStringLine(rStr, GetStreamCharSet());
if (bEmbeddedLineBreak)
{
const sal_Unicode* pSeps = rFieldSeparators.GetBuffer();
// See if the separator(s) include tab.
bool bTabSep = lcl_UnicodeStrChr(pSeps, '\t') != NULL;
xub_StrLen nLastOffset = 0;
xub_StrLen nQuotes = 0;
while (!IsEof() && rStr.Len() < STRING_MAXLEN)
{
bool bBackslashEscaped = false;
const sal_Unicode *p, *pStart;
p = pStart = rStr.GetBuffer();
p += nLastOffset;
while (*p)
{
if (nQuotes)
{
if (bTabSep && *p == '\t' && (nQuotes % 2) != 0)
{
// When tab-delimited, tab char ends quoted sequence
// even if we haven't reached the end quote. Doing
// this helps keep mal-formed rows from damaging
// other, well-formed rows.
nQuotes = 0;
break;
}
if (*p == cFieldQuote && !bBackslashEscaped)
++nQuotes;
else if (bAllowBackslashEscape)
{
if (*p == '\\')
bBackslashEscaped = !bBackslashEscaped;
else
bBackslashEscaped = false;
}
}
else if (*p == cFieldQuote && (p == pStart ||
lcl_UnicodeStrChr( pSeps, p[-1])))
nQuotes = 1;
// A quote character inside a field content does not start
// a quote.
++p;
}
if (nQuotes % 2 == 0)
break;
else
{
nLastOffset = rStr.Len();
String aNext;
ReadUniOrByteStringLine(aNext, GetStreamCharSet());
rStr += sal_Unicode(_LF);
rStr += aNext;
}
}
}
}
/*************************************************************************
|*
|* Stream::SeekRel()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment