extend unit test for INetMIME::scanContentType

This reverts parts of commit 631b6795 and commit abc6071b. some of the removed fields are usefull, m_bConverted should be checked by callers fixed 2 bugs and added test cases: * extended attributes with more than 2 sections were not parsed * extended attributes with more than 1 section were not parsed if there was an other attribute Change-Id: I61ab2af7c5151ef1bcd80cc159fa2b99559374a8 Reviewed-on: https://gerrit.libreoffice.org/36913Tested-by: Jenkins <ci@libreoffice.org> Reviewed-by: Stephan Bergmann <sbergman@redhat.com>

extend unit test for INetMIME::scanContentType
This reverts parts of commit 631b6795 and commit abc6071b. some of the removed fields are usefull, m_bConverted should be checked by callers fixed 2 bugs and added test cases: * extended attributes with more than 2 sections were not parsed * extended attributes with more than 1 section were not parsed if there was an other attribute Change-Id: I61ab2af7c5151ef1bcd80cc159fa2b99559374a8 Reviewed-on: https://gerrit.libreoffice.org/36913Tested-by: Jenkins <ci@libreoffice.org> Reviewed-by: Stephan Bergmann <sbergman@redhat.com>
bef9fe6e · Jochen Nitschke · Stephan Bergmann · 925ed700 · bef9fe6e · bef9fe6e
Kaydet (Commit) bef9fe6e authored Nis 18, 2017 tarafından Jochen Nitschke Kaydeden (comit) Stephan Bergmann Nis 26, 2017
Show whitespace changes
Inline Side-by-side

Showing with 128 additions and 17 deletions

inetmime.hxx include/tools/inetmime.hxx +20 -1

test_inetmime.cxx tools/qa/cppunit/test_inetmime.cxx +95 -8

inetmime.cxx tools/source/inet/inetmime.cxx +13 -8

No files found.
--- a/include/tools/inetmime.hxx
+++ b/include/tools/inetmime.hxx
@@ -30,6 +30,16 @@
 struct INetContentTypeParameter
 {
+    /** The optional character set specification (see RFC 2231), in US-ASCII
+        encoding and converted to lower case.
+     */
+    OString m_sCharset;
+    /** The optional language specification (see RFC 2231), in US-ASCII
+        encoding and converted to lower case.
+     */
+    OString m_sLanguage;
    /** The attribute value.  If the value is a quoted-string, it is
        'unpacked.'  If a character set is specified, and the value can be
        converted to Unicode, this is done.  Also, if no character set is
@@ -49,9 +59,18 @@ struct INetContentTypeParameter
     */
    OUString m_sValue;
+    /** This is true if the value is successfully converted to Unicode, and
+        false if the value is a special mixture of ISO-LATIN-1 characters and
+        characters from Unicode's Private Use Area.
+     */
+    bool m_bConverted;
 };
-// the key is the m_sAttribute again; all keys are lower case:
+/** The key is the name of the attribute, in US-ASCII encoding and converted
+    to lower case.  If a parameter value is split as described in RFC 2231,
+    there will only be one item for the complete parameter, with the attribute
+    name lacking any section suffix.
+ */
 typedef std::unordered_map<OString, INetContentTypeParameter, OStringHash>
    INetContentTypeParameterList;

--- a/tools/qa/cppunit/test_inetmime.cxx
+++ b/tools/qa/cppunit/test_inetmime.cxx
@@ -35,11 +35,13 @@ namespace
    public:
        void test_decodeHeaderFieldBody();
-        void test_scanContentType();
+        void test_scanContentType_basic();
+        void test_scanContentType_rfc2231();
        CPPUNIT_TEST_SUITE(Test);
        CPPUNIT_TEST(test_decodeHeaderFieldBody);
-        CPPUNIT_TEST(test_scanContentType);
+        CPPUNIT_TEST(test_scanContentType_basic);
+        CPPUNIT_TEST(test_scanContentType_rfc2231);
        CPPUNIT_TEST_SUITE_END();
    };
@@ -56,17 +58,61 @@ namespace
        CPPUNIT_ASSERT(testDecode("=?iso-8859-1?B?QUJD?=", "ABC"));
    }
-    void Test::test_scanContentType()
+    void Test::test_scanContentType_basic()
    {
        {
            OUString input
-                = "TEST/subTST; parm1*0*=US-ASCII'En'5%25%20; Parm1*1*=of%2010";
+                = "TEST/subTST; parm1=Value1; Parm2=\"unpacked value; %20\"";
+            // Just scan input for valid string:
+            auto end = INetMIME::scanContentType(input.getStr(), input.getStr()+input.getLength());
+            CPPUNIT_ASSERT(end != nullptr);
+            CPPUNIT_ASSERT_EQUAL(OUString(), OUString(end));
+            // Scan input and parse type, subType and parameters:
+            OUString type;
+            OUString subType;
+            INetContentTypeParameterList parameters;
+            end = INetMIME::scanContentType(input.getStr(), input.getStr() + input.getLength(),
+                                            &type, &subType, &parameters);
+            CPPUNIT_ASSERT(end != nullptr);
+            CPPUNIT_ASSERT_EQUAL(OUString(), OUString(end));
+            CPPUNIT_ASSERT_EQUAL(OUString("test"), type);
+            CPPUNIT_ASSERT_EQUAL(OUString("subtst"), subType);
+            CPPUNIT_ASSERT_EQUAL(
+                INetContentTypeParameterList::size_type(2), parameters.size());
+            auto i = parameters.find("parm1");
+            CPPUNIT_ASSERT(i != parameters.end());
+            CPPUNIT_ASSERT_EQUAL(OString(), i->second.m_sCharset);
+            CPPUNIT_ASSERT_EQUAL(OString(), i->second.m_sLanguage);
+            CPPUNIT_ASSERT_EQUAL(OUString("Value1"), i->second.m_sValue);
+            CPPUNIT_ASSERT(i->second.m_bConverted);
+            i = parameters.find("parm2");
+            CPPUNIT_ASSERT(i != parameters.end());
+            CPPUNIT_ASSERT_EQUAL(OString(), i->second.m_sCharset);
+            CPPUNIT_ASSERT_EQUAL(OString(), i->second.m_sLanguage);
+            CPPUNIT_ASSERT_EQUAL(OUString("unpacked value; %20"), i->second.m_sValue);
+            CPPUNIT_ASSERT(i->second.m_bConverted);
+        }
+    }
+    void Test::test_scanContentType_rfc2231()
+    {
+        {
+            // Test extended parameter with value split in 3 sections:
+            OUString input
+                = "TEST/subTST; "
+                  "parm1*0*=US-ASCII'En'5%25%20; "
+                  "Parm1*1*=of%2010;\t"
+                  "parm1*2*=%20%3d%200.5";
+            // Just scan input for valid string:
+            auto end = INetMIME::scanContentType(input.getStr(), input.getStr()+input.getLength());
+            CPPUNIT_ASSERT(end != nullptr);
+            CPPUNIT_ASSERT_EQUAL(OUString(), OUString(end));
+            // Scan input and parse type, subType and parameters:
            OUString type;
            OUString subType;
            INetContentTypeParameterList parameters;
-            auto end = INetMIME::scanContentType(
+            end = INetMIME::scanContentType(input.getStr(), input.getStr() + input.getLength(),
-                input.getStr(), input.getStr() + input.getLength(), &type,
+                                            &type, &subType, &parameters);
-                &subType, &parameters);
            CPPUNIT_ASSERT(end != nullptr);
            CPPUNIT_ASSERT_EQUAL(OUString(), OUString(end));
            CPPUNIT_ASSERT_EQUAL(OUString("test"), type);
@@ -75,7 +121,48 @@ namespace
                INetContentTypeParameterList::size_type(1), parameters.size());
            auto i = parameters.find("parm1");
            CPPUNIT_ASSERT(i != parameters.end());
-            CPPUNIT_ASSERT_EQUAL(OUString("5% of 10"), i->second.m_sValue);
+            CPPUNIT_ASSERT_EQUAL(OString("us-ascii"), i->second.m_sCharset);
+            CPPUNIT_ASSERT_EQUAL(OString("en"), i->second.m_sLanguage);
+            CPPUNIT_ASSERT_EQUAL(OUString("5% of 10 = 0.5"), i->second.m_sValue);
+            CPPUNIT_ASSERT(i->second.m_bConverted);
+            // Test extended parameters with different value charsets:
+            input = "TEST/subTST;"
+                    "parm1*0*=us-ascii'en'value;PARM1*1*=1;"
+                    "parm2*0*=WINDOWS-1250'en-GB'value2%20%80;"
+                    "parm3*0*=UNKNOWN'EN'value3";
+            // Just scan input for valid string:
+            end = INetMIME::scanContentType(input.getStr(), input.getStr()+input.getLength());
+            CPPUNIT_ASSERT(end != nullptr);
+            CPPUNIT_ASSERT_EQUAL(OUString(), OUString(end));
+            // Scan input and parse type, subType and parameters:
+            end = INetMIME::scanContentType(input.getStr(), input.getStr() + input.getLength(),
+                                            &type, &subType, &parameters);
+            CPPUNIT_ASSERT(end != nullptr);
+            CPPUNIT_ASSERT_EQUAL(OUString(), OUString(end));
+            CPPUNIT_ASSERT_EQUAL(OUString("test"), type);
+            CPPUNIT_ASSERT_EQUAL(OUString("subtst"), subType);
+            CPPUNIT_ASSERT_EQUAL(
+                INetContentTypeParameterList::size_type(3), parameters.size());
+            i = parameters.find("parm1");
+            CPPUNIT_ASSERT(i != parameters.end());
+            CPPUNIT_ASSERT_EQUAL(OString("us-ascii"), i->second.m_sCharset);
+            CPPUNIT_ASSERT_EQUAL(OString("en"), i->second.m_sLanguage);
+            CPPUNIT_ASSERT_EQUAL(OUString("value1"), i->second.m_sValue);
+            CPPUNIT_ASSERT(i->second.m_bConverted);
+            i = parameters.find("parm2");
+            CPPUNIT_ASSERT(i != parameters.end());
+            CPPUNIT_ASSERT_EQUAL(OString("windows-1250"), i->second.m_sCharset);
+            CPPUNIT_ASSERT_EQUAL(OString("en-gb"), i->second.m_sLanguage);
+            // Euro currency sign, windows-1250 x80 is converted to unicode u20AC:
+            CPPUNIT_ASSERT_EQUAL(OUString(u"value2 \u20AC"), i->second.m_sValue);
+            CPPUNIT_ASSERT(i->second.m_bConverted);
+            i = parameters.find("parm3");
+            CPPUNIT_ASSERT(i != parameters.end());
+            CPPUNIT_ASSERT_EQUAL(OString("unknown"), i->second.m_sCharset);
+            CPPUNIT_ASSERT_EQUAL(OString("en"), i->second.m_sLanguage);
+            // Convertion fails for unknown charsets:
+            CPPUNIT_ASSERT(!i->second.m_bConverted);
        }
    }

--- a/tools/source/inet/inetmime.cxx
+++ b/tools/source/inet/inetmime.cxx
@@ -369,12 +369,14 @@ struct Parameter
    Parameter * m_pNext;
    OString m_aAttribute;
    OString m_aCharset;
+    OString m_aLanguage;
    OString m_aValue;
    sal_uInt32 m_nSection;
    bool m_bExtended;
    inline Parameter(Parameter * pTheNext, const OString& rTheAttribute,
                     const OString& rTheCharset,
+                     const OString& rTheLanguage,
                     const OString& rTheValue, sal_uInt32 nTheSection,
                     bool bTheExtended);
 };
@@ -382,11 +384,13 @@ struct Parameter
 inline Parameter::Parameter(Parameter * pTheNext,
                            const OString& rTheAttribute,
                            const OString& rTheCharset,
+                            const OString& rTheLanguage,
                            const OString& rTheValue,
                            sal_uInt32 nTheSection, bool bTheExtended):
    m_pNext(pTheNext),
    m_aAttribute(rTheAttribute),
    m_aCharset(rTheCharset),
+    m_aLanguage(rTheLanguage),
    m_aValue(rTheValue),
    m_nSection(nTheSection),
    m_bExtended(bTheExtended)
@@ -439,16 +443,16 @@ Parameter ** ParameterList::find(const OString& rAttribute,
    for (; *p; p = &(*p)->m_pNext)
    {
        sal_Int32 nCompare = rAttribute.compareTo((*p)->m_aAttribute);
-        if (nCompare > 0)
+        if (nCompare < 0)
-            return &(*p)->m_pNext;
+            break;
        else if (nCompare == 0)
        {
-            if (nSection > (*p)->m_nSection)
+            if (nSection < (*p)->m_nSection)
-                return &(*p)->m_pNext;
+                break;
            else if (nSection == (*p)->m_nSection)
            {
                rPresent = true;
-                return p;
+                break;
            }
        }
    }
@@ -537,8 +541,9 @@ bool parseParameters(ParameterList const & rInput,
                        break;
                };
            }
-            INetContentTypeParameter x {aValue}; // workaround ICE in VisualStudio2013
+            auto const ret = pOutput->insert(
-            auto const ret = pOutput->insert({p->m_aAttribute, x });
+                {p->m_aAttribute,
+                 {p->m_aCharset, p->m_aLanguage, aValue, !bBadEncoding}});
            SAL_INFO_IF(!ret.second, "tools",
                "INetMIME: dropping duplicate parameter: " << p->m_aAttribute);
            p = pNext;
@@ -877,7 +882,7 @@ sal_Unicode const * scanParameters(sal_Unicode const * pBegin,
                    RTL_TEXTENCODING_UTF8);
        }
-        *pPos = new Parameter(*pPos, aAttribute, aCharset, aValue,
+        *pPos = new Parameter(*pPos, aAttribute, aCharset, aLanguage, aValue,
                              nSection, bExtended);
    }
    return parseParameters(aList, pParameters) ? pParameterBegin : pBegin;