2019-11-23 20:27:39 +08:00
|
|
|
/*
|
|
|
|
* Copyright 2017 Facebook, Inc.
|
|
|
|
* Copyright (c) 2017 Chukong Technologies
|
|
|
|
* Copyright (c) 2017-2018 Xiamen Yaji Software Co., Ltd.
|
2021-06-22 14:19:22 +08:00
|
|
|
* Copyright (c) 2021 Bytedance Inc.
|
2019-11-23 20:27:39 +08:00
|
|
|
*
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at
|
|
|
|
*
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
* Uri class is based on the original file here https://github.com/facebook/folly/blob/master/folly/Uri.cpp
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "network/Uri.h"
|
2023-06-11 13:08:08 +08:00
|
|
|
#include "base/Console.h" // For AXLOGERROR macro
|
2019-11-23 20:27:39 +08:00
|
|
|
|
|
|
|
#include <regex>
|
|
|
|
#include <sstream>
|
|
|
|
|
|
|
|
#include <ctype.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
|
|
|
|
#undef LIKELY
|
|
|
|
#undef UNLIKELY
|
|
|
|
|
|
|
|
#if defined(__GNUC__) && __GNUC__ >= 4
|
2021-12-25 10:04:45 +08:00
|
|
|
# define LIKELY(x) (__builtin_expect((x), 1))
|
|
|
|
# define UNLIKELY(x) (__builtin_expect((x), 0))
|
2019-11-23 20:27:39 +08:00
|
|
|
#else
|
2021-12-25 10:04:45 +08:00
|
|
|
# define LIKELY(x) (x)
|
|
|
|
# define UNLIKELY(x) (x)
|
2019-11-23 20:27:39 +08:00
|
|
|
#endif
|
|
|
|
|
2021-12-25 10:04:45 +08:00
|
|
|
namespace
|
|
|
|
{
|
2019-11-23 20:27:39 +08:00
|
|
|
|
2021-12-25 10:04:45 +08:00
|
|
|
template <typename T>
|
2019-11-23 20:27:39 +08:00
|
|
|
std::string toString(T arg)
|
|
|
|
{
|
|
|
|
std::stringstream ss;
|
|
|
|
ss << arg;
|
|
|
|
return ss.str();
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string submatch(const std::smatch& m, int idx)
|
|
|
|
{
|
|
|
|
auto& sub = m[idx];
|
|
|
|
return std::string(sub.first, sub.second);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <class String>
|
|
|
|
void toLower(String& s)
|
|
|
|
{
|
2022-07-21 19:19:08 +08:00
|
|
|
for (auto&& c : s)
|
2021-12-25 10:04:45 +08:00
|
|
|
{
|
2019-11-23 20:27:39 +08:00
|
|
|
c = char(tolower(c));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
} // namespace
|
|
|
|
|
2022-07-11 17:50:21 +08:00
|
|
|
NS_AX_BEGIN
|
2019-11-23 20:27:39 +08:00
|
|
|
|
2021-12-25 10:04:45 +08:00
|
|
|
namespace network
|
2019-11-23 20:27:39 +08:00
|
|
|
{
|
|
|
|
|
2022-05-23 21:55:30 +08:00
|
|
|
Uri::Uri() : _isValid(false), _isSecure(false), _hasAuthority(false), _isCustomPort(false), _port(0) {}
|
2019-11-23 20:27:39 +08:00
|
|
|
|
|
|
|
Uri::Uri(const Uri& o)
|
|
|
|
{
|
|
|
|
*this = o;
|
|
|
|
}
|
|
|
|
|
|
|
|
Uri::Uri(Uri&& o)
|
|
|
|
{
|
|
|
|
*this = std::move(o);
|
|
|
|
}
|
|
|
|
|
|
|
|
Uri& Uri::operator=(const Uri& o)
|
|
|
|
{
|
|
|
|
if (this != &o)
|
|
|
|
{
|
2021-12-25 10:04:45 +08:00
|
|
|
_isValid = o._isValid;
|
|
|
|
_isSecure = o._isSecure;
|
|
|
|
_scheme = o._scheme;
|
|
|
|
_username = o._username;
|
|
|
|
_password = o._password;
|
|
|
|
_host = o._host;
|
|
|
|
_hostName = o._hostName;
|
2019-11-23 20:27:39 +08:00
|
|
|
_hasAuthority = o._hasAuthority;
|
2022-05-23 21:55:30 +08:00
|
|
|
_isCustomPort = o._isCustomPort;
|
2021-12-25 10:04:45 +08:00
|
|
|
_port = o._port;
|
|
|
|
_authority = o._authority;
|
|
|
|
_pathEtc = o._pathEtc;
|
|
|
|
_path = o._path;
|
|
|
|
_query = o._query;
|
|
|
|
_fragment = o._fragment;
|
|
|
|
_queryParams = o._queryParams;
|
2019-11-23 20:27:39 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
|
|
|
Uri& Uri::operator=(Uri&& o)
|
|
|
|
{
|
|
|
|
if (this != &o)
|
|
|
|
{
|
2021-12-25 10:04:45 +08:00
|
|
|
_isValid = o._isValid;
|
|
|
|
o._isValid = false;
|
|
|
|
_isSecure = o._isSecure;
|
|
|
|
o._isSecure = false;
|
|
|
|
_scheme = std::move(o._scheme);
|
|
|
|
_username = std::move(o._username);
|
|
|
|
_password = std::move(o._password);
|
|
|
|
_host = std::move(o._host);
|
|
|
|
_hostName = std::move(o._hostName);
|
|
|
|
_hasAuthority = o._hasAuthority;
|
2022-05-23 21:55:30 +08:00
|
|
|
_isCustomPort = o._isCustomPort;
|
2019-11-23 20:27:39 +08:00
|
|
|
o._hasAuthority = false;
|
2022-05-23 21:55:30 +08:00
|
|
|
o._isCustomPort = false;
|
2021-12-25 10:04:45 +08:00
|
|
|
_port = o._port;
|
|
|
|
o._port = 0;
|
|
|
|
_authority = std::move(o._authority);
|
|
|
|
_pathEtc = std::move(o._pathEtc);
|
|
|
|
_path = std::move(o._path);
|
|
|
|
_query = std::move(o._query);
|
|
|
|
_fragment = std::move(o._fragment);
|
|
|
|
_queryParams = std::move(o._queryParams);
|
2019-11-23 20:27:39 +08:00
|
|
|
}
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool Uri::operator==(const Uri& o) const
|
|
|
|
{
|
2021-12-25 10:04:45 +08:00
|
|
|
return (_isValid == o._isValid && _isSecure == o._isSecure && _scheme == o._scheme && _username == o._username &&
|
|
|
|
_password == o._password && _host == o._host && _hostName == o._hostName &&
|
|
|
|
_hasAuthority == o._hasAuthority && _port == o._port && _authority == o._authority &&
|
|
|
|
_pathEtc == o._pathEtc && _path == o._path && _query == o._query && _fragment == o._fragment &&
|
|
|
|
_queryParams == o._queryParams);
|
2019-11-23 20:27:39 +08:00
|
|
|
}
|
|
|
|
|
2021-12-31 12:12:40 +08:00
|
|
|
Uri Uri::parse(std::string_view str)
|
2019-11-23 20:27:39 +08:00
|
|
|
{
|
|
|
|
Uri uri;
|
|
|
|
|
|
|
|
if (!uri.doParse(str))
|
|
|
|
{
|
|
|
|
uri.clear();
|
|
|
|
}
|
|
|
|
|
|
|
|
return uri;
|
|
|
|
}
|
|
|
|
|
2021-12-31 12:12:40 +08:00
|
|
|
bool Uri::doParse(std::string_view str)
|
2019-11-23 20:27:39 +08:00
|
|
|
{
|
|
|
|
static const std::regex uriRegex(
|
2021-12-25 10:04:45 +08:00
|
|
|
"([a-zA-Z][a-zA-Z0-9+.-]*):" // scheme:
|
|
|
|
"([^?#]*)" // authority and path
|
|
|
|
"(?:\\?([^#]*))?" // ?query
|
|
|
|
"(?:#(.*))?"); // #fragment
|
2019-11-23 20:27:39 +08:00
|
|
|
static const std::regex authorityAndPathRegex("//([^/]*)(/.*)?");
|
|
|
|
|
|
|
|
if (str.empty())
|
|
|
|
{
|
2022-07-16 10:43:05 +08:00
|
|
|
AXLOGERROR("%s", "Empty URI is invalid!");
|
2019-11-23 20:27:39 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2021-12-25 10:04:45 +08:00
|
|
|
bool hasScheme = true;
|
2023-01-04 21:26:01 +08:00
|
|
|
|
2019-11-23 20:27:39 +08:00
|
|
|
std::string copied(str);
|
|
|
|
if (copied.find("://") == std::string::npos)
|
|
|
|
{
|
|
|
|
hasScheme = false;
|
2021-12-25 10:04:45 +08:00
|
|
|
copied.insert(0, "abc://"); // Just make regex happy.
|
2019-11-23 20:27:39 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
std::smatch match;
|
2021-12-25 10:04:45 +08:00
|
|
|
if (UNLIKELY(!std::regex_match(copied.cbegin(), copied.cend(), match, uriRegex)))
|
|
|
|
{
|
2022-07-16 10:43:05 +08:00
|
|
|
AXLOGERROR("Invalid URI: %s", str.data());
|
2019-11-23 20:27:39 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string authorityAndPath(match[2].first, match[2].second);
|
|
|
|
std::smatch authorityAndPathMatch;
|
2021-12-25 10:04:45 +08:00
|
|
|
if (!std::regex_match(authorityAndPath.cbegin(), authorityAndPath.cend(), authorityAndPathMatch,
|
|
|
|
authorityAndPathRegex))
|
|
|
|
{
|
2019-11-23 20:27:39 +08:00
|
|
|
// Does not start with //, doesn't have authority
|
|
|
|
_hasAuthority = false;
|
2021-12-25 10:04:45 +08:00
|
|
|
_path = authorityAndPath;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2019-11-23 20:27:39 +08:00
|
|
|
static const std::regex authorityRegex(
|
|
|
|
"(?:([^@:]*)(?::([^@]*))?@)?" // username, password
|
|
|
|
"(\\[[^\\]]*\\]|[^\\[:]*)" // host (IP-literal (e.g. '['+IPv6+']',
|
|
|
|
// dotted-IPv4, or named host)
|
|
|
|
"(?::(\\d*))?"); // port
|
|
|
|
|
|
|
|
auto authority = authorityAndPathMatch[1];
|
|
|
|
std::smatch authorityMatch;
|
2021-12-25 10:04:45 +08:00
|
|
|
if (!std::regex_match(authority.first, authority.second, authorityMatch, authorityRegex))
|
|
|
|
{
|
2019-11-23 20:27:39 +08:00
|
|
|
std::string invalidAuthority(authority.first, authority.second);
|
2022-07-16 10:43:05 +08:00
|
|
|
AXLOGERROR("Invalid URI authority: %s", invalidAuthority.c_str());
|
2019-11-23 20:27:39 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string port(authorityMatch[4].first, authorityMatch[4].second);
|
2021-12-25 10:04:45 +08:00
|
|
|
if (!port.empty())
|
|
|
|
{
|
2019-11-23 20:27:39 +08:00
|
|
|
_port = static_cast<uint16_t>(atoi(port.c_str()));
|
|
|
|
}
|
|
|
|
|
|
|
|
_hasAuthority = true;
|
2021-12-25 10:04:45 +08:00
|
|
|
_username = submatch(authorityMatch, 1);
|
|
|
|
_password = submatch(authorityMatch, 2);
|
|
|
|
_host = submatch(authorityMatch, 3);
|
|
|
|
_path = submatch(authorityAndPathMatch, 2);
|
2019-11-23 20:27:39 +08:00
|
|
|
}
|
|
|
|
|
2021-12-25 10:04:45 +08:00
|
|
|
_query = submatch(match, 3);
|
2019-11-23 20:27:39 +08:00
|
|
|
_fragment = submatch(match, 4);
|
2021-12-25 10:04:45 +08:00
|
|
|
_isValid = true;
|
2019-11-23 20:27:39 +08:00
|
|
|
|
|
|
|
// Assign authority part
|
|
|
|
//
|
|
|
|
// Port is 5 characters max and we have up to 3 delimiters.
|
|
|
|
_authority.reserve(getHost().size() + getUserName().size() + getPassword().size() + 8);
|
|
|
|
|
2021-12-25 10:04:45 +08:00
|
|
|
if (!getUserName().empty() || !getPassword().empty())
|
|
|
|
{
|
2019-11-23 20:27:39 +08:00
|
|
|
_authority.append(getUserName());
|
|
|
|
|
2021-12-25 10:04:45 +08:00
|
|
|
if (!getPassword().empty())
|
|
|
|
{
|
2019-11-23 20:27:39 +08:00
|
|
|
_authority.push_back(':');
|
|
|
|
_authority.append(getPassword());
|
|
|
|
}
|
|
|
|
|
|
|
|
_authority.push_back('@');
|
|
|
|
}
|
|
|
|
|
|
|
|
_authority.append(getHost());
|
|
|
|
|
2021-12-25 10:04:45 +08:00
|
|
|
if (getPort() != 0)
|
|
|
|
{
|
2019-11-23 20:27:39 +08:00
|
|
|
_authority.push_back(':');
|
|
|
|
_authority.append(::toString(getPort()));
|
|
|
|
}
|
|
|
|
|
2023-01-04 21:26:01 +08:00
|
|
|
// Ensure path can be use for http request directly
|
|
|
|
if (_path.empty())
|
|
|
|
_path.push_back('/');
|
|
|
|
|
2019-11-23 20:27:39 +08:00
|
|
|
// Assign path etc part
|
|
|
|
_pathEtc = _path;
|
|
|
|
if (!_query.empty())
|
|
|
|
{
|
|
|
|
_pathEtc += '?';
|
|
|
|
_pathEtc += _query;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!_fragment.empty())
|
|
|
|
{
|
|
|
|
_pathEtc += '#';
|
|
|
|
_pathEtc += _fragment;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Assign host name
|
2021-12-25 10:04:45 +08:00
|
|
|
if (!_host.empty() && _host[0] == '[')
|
|
|
|
{
|
2019-11-23 20:27:39 +08:00
|
|
|
// If it starts with '[', then it should end with ']', this is ensured by
|
|
|
|
// regex
|
|
|
|
_hostName = _host.substr(1, _host.size() - 2);
|
2021-12-25 10:04:45 +08:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2019-11-23 20:27:39 +08:00
|
|
|
_hostName = _host;
|
|
|
|
}
|
|
|
|
|
2021-12-25 10:04:45 +08:00
|
|
|
if (hasScheme)
|
|
|
|
{
|
2021-06-22 14:19:22 +08:00
|
|
|
_scheme = submatch(match, 1);
|
|
|
|
toLower(_scheme);
|
2021-12-25 10:04:45 +08:00
|
|
|
if (_scheme == "https" || _scheme == "wss")
|
|
|
|
{
|
2021-06-22 14:19:22 +08:00
|
|
|
_isSecure = true;
|
|
|
|
if (_port == 0)
|
|
|
|
_port = 443;
|
2022-05-23 21:55:30 +08:00
|
|
|
|
|
|
|
_isCustomPort = _port != 443;
|
2021-12-25 10:04:45 +08:00
|
|
|
}
|
|
|
|
else if (_scheme == "http" || _scheme == "ws")
|
|
|
|
{
|
2021-06-22 14:19:22 +08:00
|
|
|
if (_port == 0)
|
|
|
|
_port = 80;
|
2022-05-23 21:55:30 +08:00
|
|
|
|
|
|
|
_isCustomPort = _port != 80;
|
2021-12-25 10:04:45 +08:00
|
|
|
}
|
|
|
|
else if (_scheme == "ftp")
|
|
|
|
{
|
2021-06-22 14:19:22 +08:00
|
|
|
if (_port == 0)
|
|
|
|
_port = 21;
|
2022-05-23 21:55:30 +08:00
|
|
|
|
|
|
|
_isCustomPort = _port != 21;
|
2021-06-22 14:19:22 +08:00
|
|
|
}
|
|
|
|
}
|
2022-05-23 21:55:30 +08:00
|
|
|
else
|
|
|
|
_isCustomPort = _port != 0;
|
2021-06-22 14:19:22 +08:00
|
|
|
|
2019-11-23 20:27:39 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
void Uri::clear()
|
|
|
|
{
|
2021-12-25 10:04:45 +08:00
|
|
|
_isValid = false;
|
2019-11-23 20:27:39 +08:00
|
|
|
_isSecure = false;
|
|
|
|
_scheme.clear();
|
|
|
|
_username.clear();
|
|
|
|
_password.clear();
|
|
|
|
_host.clear();
|
|
|
|
_hostName.clear();
|
|
|
|
_hasAuthority = false;
|
2022-05-23 21:55:30 +08:00
|
|
|
_isCustomPort = false;
|
2021-12-25 10:04:45 +08:00
|
|
|
_port = 0;
|
2019-11-23 20:27:39 +08:00
|
|
|
_authority.clear();
|
|
|
|
_pathEtc.clear();
|
|
|
|
_path.clear();
|
|
|
|
_query.clear();
|
|
|
|
_fragment.clear();
|
|
|
|
_queryParams.clear();
|
|
|
|
}
|
|
|
|
|
|
|
|
const std::vector<std::pair<std::string, std::string>>& Uri::getQueryParams()
|
|
|
|
{
|
2021-12-25 10:04:45 +08:00
|
|
|
if (!_query.empty() && _queryParams.empty())
|
|
|
|
{
|
2019-11-23 20:27:39 +08:00
|
|
|
// Parse query string
|
|
|
|
static const std::regex queryParamRegex(
|
|
|
|
"(^|&)" /*start of query or start of parameter "&"*/
|
|
|
|
"([^=&]*)=?" /*parameter name and "=" if value is expected*/
|
|
|
|
"([^=&]*)" /*parameter value*/
|
|
|
|
"(?=(&|$))" /*forward reference, next should be end of query or
|
|
|
|
start of next parameter*/);
|
2021-12-25 10:04:45 +08:00
|
|
|
std::cregex_iterator paramBeginItr(_query.data(), _query.data() + _query.size(), queryParamRegex);
|
2019-11-23 20:27:39 +08:00
|
|
|
std::cregex_iterator paramEndItr;
|
2021-12-25 10:04:45 +08:00
|
|
|
for (auto itr = paramBeginItr; itr != paramEndItr; itr++)
|
|
|
|
{
|
|
|
|
if (itr->length(2) == 0)
|
|
|
|
{
|
2019-11-23 20:27:39 +08:00
|
|
|
// key is empty, ignore it
|
|
|
|
continue;
|
|
|
|
}
|
2021-12-25 10:04:45 +08:00
|
|
|
_queryParams.emplace_back(std::string((*itr)[2].first, (*itr)[2].second), // parameter name
|
|
|
|
std::string((*itr)[3].first, (*itr)[3].second) // parameter value
|
2019-11-23 20:27:39 +08:00
|
|
|
);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return _queryParams;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string Uri::toString() const
|
|
|
|
{
|
|
|
|
std::stringstream ss;
|
2021-12-25 10:04:45 +08:00
|
|
|
if (_hasAuthority)
|
|
|
|
{
|
2019-11-23 20:27:39 +08:00
|
|
|
ss << _scheme << "://";
|
2021-12-25 10:04:45 +08:00
|
|
|
if (!_password.empty())
|
|
|
|
{
|
2019-11-23 20:27:39 +08:00
|
|
|
ss << _username << ":" << _password << "@";
|
2021-12-25 10:04:45 +08:00
|
|
|
}
|
|
|
|
else if (!_username.empty())
|
|
|
|
{
|
2019-11-23 20:27:39 +08:00
|
|
|
ss << _username << "@";
|
|
|
|
}
|
|
|
|
ss << _host;
|
2022-05-23 21:55:30 +08:00
|
|
|
if (_isCustomPort)
|
2021-12-25 10:04:45 +08:00
|
|
|
{
|
2019-11-23 20:27:39 +08:00
|
|
|
ss << ":" << _port;
|
|
|
|
}
|
2021-12-25 10:04:45 +08:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2019-11-23 20:27:39 +08:00
|
|
|
ss << _scheme << ":";
|
|
|
|
}
|
|
|
|
ss << _path;
|
2021-12-25 10:04:45 +08:00
|
|
|
if (!_query.empty())
|
|
|
|
{
|
2019-11-23 20:27:39 +08:00
|
|
|
ss << "?" << _query;
|
|
|
|
}
|
2021-12-25 10:04:45 +08:00
|
|
|
if (!_fragment.empty())
|
|
|
|
{
|
2019-11-23 20:27:39 +08:00
|
|
|
ss << "#" << _fragment;
|
|
|
|
}
|
|
|
|
return ss.str();
|
|
|
|
}
|
|
|
|
|
2021-12-25 10:04:45 +08:00
|
|
|
} // namespace network
|
2019-11-23 20:27:39 +08:00
|
|
|
|
2022-07-11 17:50:21 +08:00
|
|
|
NS_AX_END
|