Kaydet (Commit) d739b01b authored tarafından Stephan Bergmann's avatar Stephan Bergmann

Adapt rtl_uriConvertRelToAbs to RFC 3986

...which updates RFC 2396, removes the requirement that the base URI's path
starts with a slash, and clarifies how to treat excess "." and ".." segments.

This nicely allows handling of those odd vnd.sun.star.Package URLs as intended
now, so that making <foo> absolute relative to base URL
<vnd.sun.star.Package:Pictures/bar> yields <vnd.sun.star.Package:Pictures/foo>
instead of provoking a MalformedUriException.

Change-Id: Ice84303a57698a2c05d3a45541fe78b67450fa3c
üst 58f03356
...@@ -294,39 +294,27 @@ SAL_DLLPUBLIC void SAL_CALL rtl_uriDecode( ...@@ -294,39 +294,27 @@ SAL_DLLPUBLIC void SAL_CALL rtl_uriDecode(
rtl_uString ** pResult) rtl_uString ** pResult)
SAL_THROW_EXTERN_C(); SAL_THROW_EXTERN_C();
/** Convert a relative URI reference into an absolute one. /** Convert a relative URI reference into an absolute URI.
A URI reference is a URI plus an optional @<"#" fragment> part. This function uses the strict parser algorithm described in RFC 3986,
section 5.2.
This function uses the algorithm described in RFC 2396, section 5.2, with
the following clarifications: (1) Backwards-compatible relative URIs
starting with a scheme component (see RFC 2396, section 5.2, step 3) are not
supported. (2) Segments "." and ".." within the path of the base URI are
not considered special, RFC 2396 seems a bit unlcear about that point.
(3) Erroneous excess segments ".." within the path of the relative URI (if
it is indeed relative) are left intact, as the examples in RFC 2396,
section C.2, suggest. (4) If the relative URI is a reference to the
"current document," the "current document" is taken to be the base URI.
This function signals exceptions by returning false and letting pException This function signals exceptions by returning false and letting pException
point to a message explaining the exception. point to a message explaining the exception.
@param pBaseUriRef @param pBaseUriRef
An absolute, hierarchical URI reference that serves as the base URI. If it An absolute URI that serves as the base URI. If it has to be inspected
has to be inspected (i.e., pRelUriRef is not an absolute URI already), and (i.e., pRelUriRef is not an absolute URI already), and it is not an absolute
if it either is not an absolute URI (i.e., does not begin with a URI (i.e., does not begin with a @<scheme ":"> part), an exception will be
@<scheme ":"> part) or has a path that is non-empty but does not start signaled.
with "/", an exception will be signaled.
@param pRelUriRef @param pRelUriRef
An URI reference that may be either absolute or relative. If it is An URI reference that may be either absolute or relative. If it is
absolute, it will be returned unmodified (and it need not be hierarchical absolute, it will be returned unmodified.
then).
@param pResult @param pResult
Returns an absolute URI reference. Must itself not be null, and must point Returns an absolute URI. Must itself not be null, and must point to either
to either null or a valid string. If an exception is signalled, it is left null or a valid string. If an exception is signalled, it is left unchanged.
unchanged.
@param pException @param pException
Returns an explanatory message in case an exception is signalled. Must Returns an explanatory message in case an exception is signalled. Must
......
...@@ -279,14 +279,14 @@ void Test::test_Uri() { ...@@ -279,14 +279,14 @@ void Test::test_Uri() {
char const * pAbs; char const * pAbs;
}; };
static RelToAbsTest const aRelToAbsTest[] static RelToAbsTest const aRelToAbsTest[]
= { // The following tests are taken from RFC 2396: = { // The following tests are taken from RFC 3986:
{ "http://a/b/c/d;p?q", "g:h", "g:h" }, { "http://a/b/c/d;p?q", "g:h", "g:h" },
{ "http://a/b/c/d;p?q", "g", "http://a/b/c/g" }, { "http://a/b/c/d;p?q", "g", "http://a/b/c/g" },
{ "http://a/b/c/d;p?q", "./g", "http://a/b/c/g" }, { "http://a/b/c/d;p?q", "./g", "http://a/b/c/g" },
{ "http://a/b/c/d;p?q", "g/", "http://a/b/c/g/" }, { "http://a/b/c/d;p?q", "g/", "http://a/b/c/g/" },
{ "http://a/b/c/d;p?q", "/g", "http://a/g" }, { "http://a/b/c/d;p?q", "/g", "http://a/g" },
{ "http://a/b/c/d;p?q", "//g", "http://g" }, { "http://a/b/c/d;p?q", "//g", "http://g" },
{ "http://a/b/c/d;p?q", "?y", "http://a/b/c/?y" }, { "http://a/b/c/d;p?q", "?y", "http://a/b/c/d;p?y" },
{ "http://a/b/c/d;p?q", "g?y", "http://a/b/c/g?y" }, { "http://a/b/c/d;p?q", "g?y", "http://a/b/c/g?y" },
{ "http://a/b/c/d;p?q", "#s", "http://a/b/c/d;p?q#s" }, { "http://a/b/c/d;p?q", "#s", "http://a/b/c/d;p?q#s" },
{ "http://a/b/c/d;p?q", "g#s", "http://a/b/c/g#s" }, { "http://a/b/c/d;p?q", "g#s", "http://a/b/c/g#s" },
...@@ -294,6 +294,7 @@ void Test::test_Uri() { ...@@ -294,6 +294,7 @@ void Test::test_Uri() {
{ "http://a/b/c/d;p?q", ";x", "http://a/b/c/;x" }, { "http://a/b/c/d;p?q", ";x", "http://a/b/c/;x" },
{ "http://a/b/c/d;p?q", "g;x", "http://a/b/c/g;x" }, { "http://a/b/c/d;p?q", "g;x", "http://a/b/c/g;x" },
{ "http://a/b/c/d;p?q", "g;x?y#s", "http://a/b/c/g;x?y#s" }, { "http://a/b/c/d;p?q", "g;x?y#s", "http://a/b/c/g;x?y#s" },
{ "http://a/b/c/d;p?q", "", "http://a/b/c/d;p?q" },
{ "http://a/b/c/d;p?q", ".", "http://a/b/c/" }, { "http://a/b/c/d;p?q", ".", "http://a/b/c/" },
{ "http://a/b/c/d;p?q", "./", "http://a/b/c/" }, { "http://a/b/c/d;p?q", "./", "http://a/b/c/" },
{ "http://a/b/c/d;p?q", "..", "http://a/b/" }, { "http://a/b/c/d;p?q", "..", "http://a/b/" },
...@@ -302,11 +303,10 @@ void Test::test_Uri() { ...@@ -302,11 +303,10 @@ void Test::test_Uri() {
{ "http://a/b/c/d;p?q", "../..", "http://a/" }, { "http://a/b/c/d;p?q", "../..", "http://a/" },
{ "http://a/b/c/d;p?q", "../../", "http://a/" }, { "http://a/b/c/d;p?q", "../../", "http://a/" },
{ "http://a/b/c/d;p?q", "../../g", "http://a/g" }, { "http://a/b/c/d;p?q", "../../g", "http://a/g" },
{ "http://a/b/c/d;p?q", "", "http://a/b/c/d;p?q" }, { "http://a/b/c/d;p?q", "../../../g", "http://a/g" },
{ "http://a/b/c/d;p?q", "../../../g", "http://a/../g" }, { "http://a/b/c/d;p?q", "../../../../g", "http://a/g" },
{ "http://a/b/c/d;p?q", "../../../../g", "http://a/../../g" }, { "http://a/b/c/d;p?q", "/./g", "http://a/g" },
{ "http://a/b/c/d;p?q", "/./g", "http://a/./g" }, { "http://a/b/c/d;p?q", "/../g", "http://a/g" },
{ "http://a/b/c/d;p?q", "/../g", "http://a/../g" },
{ "http://a/b/c/d;p?q", "g.", "http://a/b/c/g." }, { "http://a/b/c/d;p?q", "g.", "http://a/b/c/g." },
{ "http://a/b/c/d;p?q", ".g", "http://a/b/c/.g" }, { "http://a/b/c/d;p?q", ".g", "http://a/b/c/.g" },
{ "http://a/b/c/d;p?q", "g..", "http://a/b/c/g.." }, { "http://a/b/c/d;p?q", "g..", "http://a/b/c/g.." },
...@@ -322,13 +322,15 @@ void Test::test_Uri() { ...@@ -322,13 +322,15 @@ void Test::test_Uri() {
{ "http://a/b/c/d;p?q", "g#s/./x", "http://a/b/c/g#s/./x" }, { "http://a/b/c/d;p?q", "g#s/./x", "http://a/b/c/g#s/./x" },
{ "http://a/b/c/d;p?q", "g#s/../x", "http://a/b/c/g#s/../x" }, { "http://a/b/c/d;p?q", "g#s/../x", "http://a/b/c/g#s/../x" },
{ "http://a/b/c/d;p?q", "http:g", "http:g" }, { "http://a/b/c/d;p?q", "http:g", "http:g" },
{ "http!://a/b/c/d;p?q", "g:h", "g:h" }, { "http!://a/b/c/d;p?q", "g:h", "g:h" },
{ "http!://a/b/c/d;p?q", "g", 0 }, { "http!://a/b/c/d;p?q", "g", 0 },
{ "http:b/c/d;p?q", "g:h", "g:h" }, { "http:b/c/d;p?q", "g:h", "g:h" },
{ "http:b/c/d;p?q", "g", 0 }, { "http:b/c/d;p?q", "g", "http:b/c/g" },
{ "http://a/b/../", "../c", "http://a/b/../../c" }, { "http://a/b/../", "../c", "http://a/c" },
{ "http://a/b/..", "../c", "http://a/c" }, { "http://a/b/..", "../c", "http://a/c" },
{ "http://a/./b/", ".././.././../c", "http://a/./../../c" } }; { "http://a/./b/", ".././.././../c", "http://a/c" },
{ "http://a", "b", "http://a/b" } };
for (std::size_t i = 0; i < sizeof aRelToAbsTest / sizeof (RelToAbsTest); ++i) for (std::size_t i = 0; i < sizeof aRelToAbsTest / sizeof (RelToAbsTest); ++i)
{ {
rtl::OUString aAbs; rtl::OUString aAbs;
......
...@@ -32,6 +32,7 @@ ...@@ -32,6 +32,7 @@
#include "sal/types.h" #include "sal/types.h"
#include "sal/macros.h" #include "sal/macros.h"
#include <algorithm>
#include <cstddef> #include <cstddef>
namespace { namespace {
...@@ -412,81 +413,55 @@ void parseUriRef(rtl_uString const * pUriRef, Components * pComponents) ...@@ -412,81 +413,55 @@ void parseUriRef(rtl_uString const * pUriRef, Components * pComponents)
} }
} }
rtl::OUString joinPaths(Component const & rBasePath, Component const & rRelPath) void appendPath(
rtl::OUStringBuffer & buffer, sal_Int32 bufferStart, bool precedingSlash,
sal_Unicode const * pathBegin, sal_Unicode const * pathEnd)
{ {
assert(rBasePath.isPresent() && *rBasePath.pBegin == '/'); while (precedingSlash || pathBegin != pathEnd) {
assert(rRelPath.isPresent()); sal_Unicode const * p = pathBegin;
while (p != pathEnd && *p != '/') {
// The invariant of aBuffer is that it always starts and ends with a slash ++p;
// (until probably right at the end of the algorithm, when the last segment
// of rRelPath is added, which does not necessarily end in a slash):
rtl::OUStringBuffer aBuffer(rBasePath.getLength() + rRelPath.getLength());
// XXX numeric overflow
// Segments "." and ".." within rBasePath are not conisdered special (but
// are also not removed by ".." segments within rRelPath), RFC 2396 seems a
// bit unclear about this point:
sal_Int32 nFixed = 1;
sal_Unicode const * p = rBasePath.pBegin + 1;
for (sal_Unicode const * q = p; q != rBasePath.pEnd; ++q)
if (*q == '/')
{
if (
(q - p == 1 && p[0] == '.') ||
(q - p == 2 && p[0] == '.' && p[1] == '.')
)
{
nFixed = q + 1 - rBasePath.pBegin;
}
p = q + 1;
} }
aBuffer.append(rBasePath.pBegin, p - rBasePath.pBegin); std::size_t n = p - pathBegin;
if (n == 1 && pathBegin[0] == '.') {
p = rRelPath.pBegin; // input begins with "." -> remove from input (and done):
if (p != rRelPath.pEnd) // i.e., !precedingSlash -> !precedingSlash
for (;;) // input begins with "./" -> remove from input:
{ // i.e., !precedingSlash -> !precedingSlash
sal_Unicode const * q = p; // input begins with "/." -> replace with "/" in input (and not yet
sal_Unicode const * r; // done):
for (;;) // i.e., precedingSlash -> precedingSlash
{ // input begins with "/./" -> replace with "/" in input:
if (q == rRelPath.pEnd) // i.e., precedingSlash -> precedingSlash
{ } else if (n == 2 && pathBegin[0] == '.' && pathBegin[1] == '.') {
r = q; // input begins with ".." -> remove from input (and done):
break; // i.e., !precedingSlash -> !precedingSlash
} // input begins with "../" -> remove from input
if (*q == '/') // i.e., !precedingSlash -> !precedingSlash
{ // input begins with "/.." -> replace with "/" in input, and shrink
r = q + 1; // output (not not yet done):
break; // i.e., precedingSlash -> precedingSlash
} // input begins with "/../" -> replace with "/" in input, and shrink
++q; // output:
// i.e., precedingSlash -> precedingSlash
if (precedingSlash) {
buffer.truncate(
bufferStart
+ std::max<sal_Int32>(
rtl_ustr_lastIndexOfChar_WithLength(
buffer.getStr() + bufferStart,
buffer.getLength() - bufferStart, '/'),
0));
} }
if (q - p == 2 && p[0] == '.' && p[1] == '.') } else {
{ if (precedingSlash) {
// Erroneous excess segments ".." within rRelPath are left buffer.append('/');
// intact, as the examples in RFC 2396, section C.2, suggest:
sal_Int32 i = aBuffer.getLength() - 1;
if (i < nFixed)
{
aBuffer.append(p, r - p);
nFixed += 3;
}
else
{
while (i > 0 && aBuffer[i - 1] != '/')
--i;
aBuffer.setLength(i);
}
} }
else if (q - p != 1 || *p != '.') buffer.append(pathBegin, n);
aBuffer.append(p, r - p); precedingSlash = p != pathEnd;
if (q == rRelPath.pEnd)
break;
p = q + 1;
} }
pathBegin = p + (p == pathEnd ? 0 : 1);
return aBuffer.makeStringAndClear(); }
} }
} }
...@@ -689,87 +664,101 @@ sal_Bool SAL_CALL rtl_uriConvertRelToAbs(rtl_uString * pBaseUriRef, ...@@ -689,87 +664,101 @@ sal_Bool SAL_CALL rtl_uriConvertRelToAbs(rtl_uString * pBaseUriRef,
rtl_uString ** pException) rtl_uString ** pException)
SAL_THROW_EXTERN_C() SAL_THROW_EXTERN_C()
{ {
// If pRelUriRef starts with a scheme component it is an absolute URI // Use the strict parser algorithm from RFC 3986, section 5.2, to turn the
// reference, and we are done (i.e., this algorithm does not support // relative URI into an absolute one:
// backwards-compatible relative URIs starting with a scheme component, see rtl::OUStringBuffer aBuffer;
// RFC 2396, section 5.2, step 3):
Components aRelComponents; Components aRelComponents;
parseUriRef(pRelUriRef, &aRelComponents); parseUriRef(pRelUriRef, &aRelComponents);
if (aRelComponents.aScheme.isPresent()) if (aRelComponents.aScheme.isPresent())
{ {
rtl_uString_assign(pResult, pRelUriRef); aBuffer.append(aRelComponents.aScheme.pBegin,
return true; aRelComponents.aScheme.getLength());
} if (aRelComponents.aAuthority.isPresent())
aBuffer.append(aRelComponents.aAuthority.pBegin,
// Parse pBaseUriRef; if the scheme component is not present or not valid, aRelComponents.aAuthority.getLength());
// or the path component is not empty and starts with anything but a slash, appendPath(
// an exception is raised: aBuffer, aBuffer.getLength(), false, aRelComponents.aPath.pBegin,
Components aBaseComponents; aRelComponents.aPath.pEnd);
parseUriRef(pBaseUriRef, &aBaseComponents);
if (!aBaseComponents.aScheme.isPresent())
{
rtl_uString_assign(
pException,
(rtl::OUString(
"<" + rtl::OUString(pBaseUriRef)
+ "> does not start with a scheme component")
.pData));
return false;
}
if (aBaseComponents.aPath.pBegin != aBaseComponents.aPath.pEnd
&& *aBaseComponents.aPath.pBegin != '/')
{
rtl_uString_assign(
pException,
(rtl::OUString(
"<" + rtl::OUString(pBaseUriRef)
+ "> path component does not start with a slash")
.pData));
return false;
}
// Use the algorithm from RFC 2396, section 5.2, to turn the relative URI
// into an absolute one (if the relative URI is a reference to the "current
// document," the "current document" is here taken to be the base URI):
rtl::OUStringBuffer aBuffer;
aBuffer.append(aBaseComponents.aScheme.pBegin,
aBaseComponents.aScheme.getLength());
if (aRelComponents.aAuthority.isPresent())
{
aBuffer.append(aRelComponents.aAuthority.pBegin,
aRelComponents.aAuthority.getLength());
aBuffer.append(aRelComponents.aPath.pBegin,
aRelComponents.aPath.getLength());
if (aRelComponents.aQuery.isPresent()) if (aRelComponents.aQuery.isPresent())
aBuffer.append(aRelComponents.aQuery.pBegin, aBuffer.append(aRelComponents.aQuery.pBegin,
aRelComponents.aQuery.getLength()); aRelComponents.aQuery.getLength());
} }
else else
{ {
if (aBaseComponents.aAuthority.isPresent()) Components aBaseComponents;
aBuffer.append(aBaseComponents.aAuthority.pBegin, parseUriRef(pBaseUriRef, &aBaseComponents);
aBaseComponents.aAuthority.getLength()); if (!aBaseComponents.aScheme.isPresent())
if (aRelComponents.aPath.pBegin == aRelComponents.aPath.pEnd
&& !aRelComponents.aQuery.isPresent())
{ {
aBuffer.append(aBaseComponents.aPath.pBegin, rtl_uString_assign(
aBaseComponents.aPath.getLength()); pException,
if (aBaseComponents.aQuery.isPresent()) (rtl::OUString(
aBuffer.append(aBaseComponents.aQuery.pBegin, "<" + rtl::OUString(pBaseUriRef)
aBaseComponents.aQuery.getLength()); + "> does not start with a scheme component")
.pData));
return false;
} }
else aBuffer.append(aBaseComponents.aScheme.pBegin,
aBaseComponents.aScheme.getLength());
if (aRelComponents.aAuthority.isPresent())
{ {
if (*aRelComponents.aPath.pBegin == '/') aBuffer.append(aRelComponents.aAuthority.pBegin,
aBuffer.append(aRelComponents.aPath.pBegin, aRelComponents.aAuthority.getLength());
aRelComponents.aPath.getLength()); appendPath(
else aBuffer, aBuffer.getLength(), false,
aBuffer.append(joinPaths(aBaseComponents.aPath, aRelComponents.aPath.pBegin, aRelComponents.aPath.pEnd);
aRelComponents.aPath));
if (aRelComponents.aQuery.isPresent()) if (aRelComponents.aQuery.isPresent())
aBuffer.append(aRelComponents.aQuery.pBegin, aBuffer.append(aRelComponents.aQuery.pBegin,
aRelComponents.aQuery.getLength()); aRelComponents.aQuery.getLength());
} }
else
{
if (aBaseComponents.aAuthority.isPresent())
aBuffer.append(aBaseComponents.aAuthority.pBegin,
aBaseComponents.aAuthority.getLength());
if (aRelComponents.aPath.pBegin == aRelComponents.aPath.pEnd)
{
aBuffer.append(aBaseComponents.aPath.pBegin,
aBaseComponents.aPath.getLength());
if (aRelComponents.aQuery.isPresent())
aBuffer.append(aRelComponents.aQuery.pBegin,
aRelComponents.aQuery.getLength());
else if (aBaseComponents.aQuery.isPresent())
aBuffer.append(aBaseComponents.aQuery.pBegin,
aBaseComponents.aQuery.getLength());
}
else
{
if (aRelComponents.aPath.pBegin != aRelComponents.aPath.pEnd
&& *aRelComponents.aPath.pBegin == '/')
appendPath(
aBuffer, aBuffer.getLength(), false,
aRelComponents.aPath.pBegin, aRelComponents.aPath.pEnd);
else if (aBaseComponents.aAuthority.isPresent()
&& aBaseComponents.aPath.pBegin
== aBaseComponents.aPath.pEnd)
appendPath(
aBuffer, aBuffer.getLength(), true,
aRelComponents.aPath.pBegin, aRelComponents.aPath.pEnd);
else
{
sal_Int32 n = aBuffer.getLength();
sal_Int32 i = rtl_ustr_lastIndexOfChar_WithLength(
aBaseComponents.aPath.pBegin,
aBaseComponents.aPath.getLength(), '/');
if (i >= 0) {
appendPath(
aBuffer, n, false, aBaseComponents.aPath.pBegin,
aBaseComponents.aPath.pBegin + i);
}
appendPath(
aBuffer, n, i >= 0, aRelComponents.aPath.pBegin,
aRelComponents.aPath.pEnd);
}
if (aRelComponents.aQuery.isPresent())
aBuffer.append(aRelComponents.aQuery.pBegin,
aRelComponents.aQuery.getLength());
}
}
} }
if (aRelComponents.aFragment.isPresent()) if (aRelComponents.aFragment.isPresent())
aBuffer.append(aRelComponents.aFragment.pBegin, aBuffer.append(aRelComponents.aFragment.pBegin,
......
...@@ -238,11 +238,7 @@ namespace svgio ...@@ -238,11 +238,7 @@ namespace svgio
try { try {
aAbsUrl = rtl::Uri::convertRelToAbs(rPath, maUrl); aAbsUrl = rtl::Uri::convertRelToAbs(rPath, maUrl);
} catch (rtl::MalformedUriException & e) { } catch (rtl::MalformedUriException & e) {
// Happens for the odd rPath = SAL_WARN(
// "vnd.sun.star.Package:Pictures/..." scheme using
// path components not starting with a slash by mis-
// design:
SAL_INFO(
"svg", "svg",
"caught rtl::MalformedUriException \"" "caught rtl::MalformedUriException \""
<< e.getMessage() << "\""); << e.getMessage() << "\"");
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment