1908 lines
51 KiB
C
Raw Normal View History

2018-11-29 20:27:00 +01:00
/***************************************************************************
* _ _ ____ _
* Project ___| | | | _ \| |
* / __| | | | |_) | |
* | (__| |_| | _ <| |___
* \___|\___/|_| \_\_____|
*
2023-07-02 19:51:09 +02:00
* Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
2018-11-29 20:27:00 +01:00
*
* This software is licensed as described in the file COPYING, which
* you should have received as part of this distribution. The terms
2021-09-14 00:13:48 +02:00
* are also available at https://curl.se/docs/copyright.html.
2018-11-29 20:27:00 +01:00
*
* You may opt to use, copy, modify, merge, publish, distribute and/or sell
* copies of the Software, and permit persons to whom the Software is
* furnished to do so, under the terms of the COPYING file.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
2022-11-16 20:14:03 +01:00
* SPDX-License-Identifier: curl
*
2018-11-29 20:27:00 +01:00
***************************************************************************/
#include "curl_setup.h"
#include "urldata.h"
#include "urlapi-int.h"
#include "strcase.h"
#include "url.h"
#include "escape.h"
#include "curl_ctype.h"
2020-08-30 11:54:41 +02:00
#include "inet_pton.h"
2022-03-29 21:10:50 +02:00
#include "inet_ntop.h"
2022-11-16 20:14:03 +01:00
#include "strdup.h"
2023-07-02 19:51:09 +02:00
#include "idn.h"
#include "curl_memrchr.h"
2018-11-29 20:27:00 +01:00
/* The last 3 #include files should be in this order */
#include "curl_printf.h"
#include "curl_memory.h"
#include "memdebug.h"
/* MSDOS/Windows style drive prefix, eg c: in c:foo */
#define STARTS_WITH_DRIVE_PREFIX(str) \
((('a' <= str[0] && str[0] <= 'z') || \
('A' <= str[0] && str[0] <= 'Z')) && \
(str[1] == ':'))
/* MSDOS/Windows style drive prefix, optionally with
* a '|' instead of ':', followed by a slash or NUL */
#define STARTS_WITH_URL_DRIVE_PREFIX(str) \
((('a' <= (str)[0] && (str)[0] <= 'z') || \
('A' <= (str)[0] && (str)[0] <= 'Z')) && \
((str)[1] == ':' || (str)[1] == '|') && \
((str)[2] == '/' || (str)[2] == '\\' || (str)[2] == 0))
2022-03-29 21:10:50 +02:00
/* scheme is not URL encoded, the longest libcurl supported ones are... */
#define MAX_SCHEME_LEN 40
2023-07-02 19:51:09 +02:00
/*
* If ENABLE_IPV6 is disabled, we still want to parse IPv6 addresses, so make
* sure we have _some_ value for AF_INET6 without polluting our fake value
* everywhere.
*/
#if !defined(ENABLE_IPV6) && !defined(AF_INET6)
#define AF_INET6 (AF_INET + 1)
#endif
2018-11-29 20:27:00 +01:00
/* Internal representation of CURLU. Point to URL-encoded strings. */
struct Curl_URL {
char *scheme;
char *user;
char *password;
char *options; /* IMAP only? */
char *host;
2019-11-11 23:01:05 +01:00
char *zoneid; /* for numerical IPv6 addresses */
2018-11-29 20:27:00 +01:00
char *port;
char *path;
char *query;
char *fragment;
long portnum; /* the numerical version */
};
#define DEFAULT_SCHEME "https"
static void free_urlhandle(struct Curl_URL *u)
{
free(u->scheme);
free(u->user);
free(u->password);
free(u->options);
free(u->host);
2019-11-11 23:01:05 +01:00
free(u->zoneid);
2018-11-29 20:27:00 +01:00
free(u->port);
free(u->path);
free(u->query);
free(u->fragment);
}
/*
* Find the separator at the end of the host name, or the '?' in cases like
* http://www.url.com?id=2380
*/
static const char *find_host_sep(const char *url)
{
const char *sep;
const char *query;
/* Find the start of the hostname */
sep = strstr(url, "//");
if(!sep)
sep = url;
else
sep += 2;
query = strchr(sep, '?');
sep = strchr(sep, '/');
if(!sep)
sep = url + strlen(url);
if(!query)
query = url + strlen(url);
return sep < query ? sep : query;
}
/*
2023-07-02 19:51:09 +02:00
* Decide whether a character in a URL must be escaped.
2018-11-29 20:27:00 +01:00
*/
2023-07-02 19:51:09 +02:00
#define urlchar_needs_escaping(c) (!(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c)))
2018-11-29 20:27:00 +01:00
2023-07-02 19:51:09 +02:00
static const char hexdigits[] = "0123456789abcdef";
2022-11-16 20:14:03 +01:00
/* urlencode_str() writes data into an output dynbuf and URL-encodes the
* spaces in the source URL accordingly.
*
2018-11-29 20:27:00 +01:00
* URL encoding should be skipped for host names, otherwise IDN resolution
* will fail.
*/
2022-11-16 20:14:03 +01:00
static CURLUcode urlencode_str(struct dynbuf *o, const char *url,
size_t len, bool relative,
bool query)
2018-11-29 20:27:00 +01:00
{
/* we must add this with whitespace-replacing */
2022-11-16 20:14:03 +01:00
bool left = !query;
2018-11-29 20:27:00 +01:00
const unsigned char *iptr;
const unsigned char *host_sep = (const unsigned char *) url;
if(!relative)
host_sep = (const unsigned char *) find_host_sep(url);
for(iptr = (unsigned char *)url; /* read from here */
2022-11-16 20:14:03 +01:00
len; iptr++, len--) {
2018-11-29 20:27:00 +01:00
if(iptr < host_sep) {
2022-11-16 20:14:03 +01:00
if(Curl_dyn_addn(o, iptr, 1))
return CURLUE_OUT_OF_MEMORY;
2018-11-29 20:27:00 +01:00
continue;
}
2022-03-29 21:10:50 +02:00
if(*iptr == ' ') {
2018-11-29 20:27:00 +01:00
if(left) {
2022-11-16 20:14:03 +01:00
if(Curl_dyn_addn(o, "%20", 3))
return CURLUE_OUT_OF_MEMORY;
}
else {
if(Curl_dyn_addn(o, "+", 1))
return CURLUE_OUT_OF_MEMORY;
2018-11-29 20:27:00 +01:00
}
2022-03-29 21:10:50 +02:00
continue;
2018-11-29 20:27:00 +01:00
}
2022-03-29 21:10:50 +02:00
if(*iptr == '?')
left = FALSE;
if(urlchar_needs_escaping(*iptr)) {
2023-07-02 19:51:09 +02:00
char out[3]={'%'};
out[1] = hexdigits[*iptr>>4];
out[2] = hexdigits[*iptr & 0xf];
if(Curl_dyn_addn(o, out, 3))
2022-11-16 20:14:03 +01:00
return CURLUE_OUT_OF_MEMORY;
}
else {
if(Curl_dyn_addn(o, iptr, 1))
return CURLUE_OUT_OF_MEMORY;
2022-03-29 21:10:50 +02:00
}
2018-11-29 20:27:00 +01:00
}
2022-11-16 20:14:03 +01:00
return CURLUE_OK;
2018-11-29 20:27:00 +01:00
}
/*
2022-11-16 20:14:03 +01:00
* Returns the length of the scheme if the given URL is absolute (as opposed
* to relative). Stores the scheme in the buffer if TRUE and 'buf' is
* non-NULL. The buflen must be larger than MAX_SCHEME_LEN if buf is set.
*
* If 'guess_scheme' is TRUE, it means the URL might be provided without
* scheme.
2018-11-29 20:27:00 +01:00
*/
2022-11-16 20:14:03 +01:00
size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen,
bool guess_scheme)
2018-11-29 20:27:00 +01:00
{
2022-08-04 22:12:04 +02:00
int i;
2022-03-29 21:10:50 +02:00
DEBUGASSERT(!buf || (buflen > MAX_SCHEME_LEN));
(void)buflen; /* only used in debug-builds */
if(buf)
buf[0] = 0; /* always leave a defined value in buf */
2018-11-29 20:27:00 +01:00
#ifdef WIN32
2022-11-16 20:14:03 +01:00
if(guess_scheme && STARTS_WITH_DRIVE_PREFIX(url))
return 0;
2018-11-29 20:27:00 +01:00
#endif
2022-03-29 21:10:50 +02:00
for(i = 0; i < MAX_SCHEME_LEN; ++i) {
2018-11-29 20:27:00 +01:00
char s = url[i];
2022-03-29 21:10:50 +02:00
if(s && (ISALNUM(s) || (s == '+') || (s == '-') || (s == '.') )) {
/* RFC 3986 3.1 explains:
scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
*/
2018-11-29 20:27:00 +01:00
}
2022-03-29 21:10:50 +02:00
else {
2018-11-29 20:27:00 +01:00
break;
2022-03-29 21:10:50 +02:00
}
}
2022-11-16 20:14:03 +01:00
if(i && (url[i] == ':') && ((url[i + 1] == '/') || !guess_scheme)) {
/* If this does not guess scheme, the scheme always ends with the colon so
that this also detects data: URLs etc. In guessing mode, data: could
be the host name "data" with a specified port number. */
/* the length of the scheme is the name part only */
size_t len = i;
2022-03-29 21:10:50 +02:00
if(buf) {
buf[i] = 0;
while(i--) {
2022-11-16 20:14:03 +01:00
buf[i] = Curl_raw_tolower(url[i]);
2022-03-29 21:10:50 +02:00
}
}
2022-11-16 20:14:03 +01:00
return len;
2018-11-29 20:27:00 +01:00
}
2022-11-16 20:14:03 +01:00
return 0;
2018-11-29 20:27:00 +01:00
}
/*
* Concatenate a relative URL to a base URL making it absolute.
* URL-encodes any spaces.
* The returned pointer must be freed by the caller unless NULL
* (returns NULL on out of memory).
2022-11-16 20:14:03 +01:00
*
* Note that this function destroys the 'base' string.
2018-11-29 20:27:00 +01:00
*/
2022-11-16 20:14:03 +01:00
static char *concat_url(char *base, const char *relurl)
2018-11-29 20:27:00 +01:00
{
/***
TRY to append this new path to the old URL
to the right of the host part. Oh crap, this is doomed to cause
problems in the future...
*/
2022-11-16 20:14:03 +01:00
struct dynbuf newest;
2018-11-29 20:27:00 +01:00
char *protsep;
char *pathsep;
bool host_changed = FALSE;
const char *useurl = relurl;
/* protsep points to the start of the host name */
2022-11-16 20:14:03 +01:00
protsep = strstr(base, "//");
2018-11-29 20:27:00 +01:00
if(!protsep)
2022-11-16 20:14:03 +01:00
protsep = base;
2018-11-29 20:27:00 +01:00
else
protsep += 2; /* pass the slashes */
if('/' != relurl[0]) {
int level = 0;
/* First we need to find out if there's a ?-letter in the URL,
and cut it and the right-side of that off */
pathsep = strchr(protsep, '?');
if(pathsep)
*pathsep = 0;
/* we have a relative path to append to the last slash if there's one
available, or if the new URL is just a query string (starts with a
'?') we append the new one at the end of the entire currently worked
out URL */
if(useurl[0] != '?') {
pathsep = strrchr(protsep, '/');
if(pathsep)
*pathsep = 0;
}
/* Check if there's any slash after the host name, and if so, remember
that position instead */
pathsep = strchr(protsep, '/');
if(pathsep)
protsep = pathsep + 1;
else
protsep = NULL;
/* now deal with one "./" or any amount of "../" in the newurl
and act accordingly */
if((useurl[0] == '.') && (useurl[1] == '/'))
useurl += 2; /* just skip the "./" */
while((useurl[0] == '.') &&
(useurl[1] == '.') &&
(useurl[2] == '/')) {
level++;
useurl += 3; /* pass the "../" */
}
if(protsep) {
while(level--) {
/* cut off one more level from the right of the original URL */
pathsep = strrchr(protsep, '/');
if(pathsep)
*pathsep = 0;
else {
*protsep = 0;
break;
}
}
}
}
else {
/* We got a new absolute path for this server */
2020-08-30 11:54:41 +02:00
if(relurl[1] == '/') {
2018-11-29 20:27:00 +01:00
/* the new URL starts with //, just keep the protocol part from the
original one */
*protsep = 0;
useurl = &relurl[2]; /* we keep the slashes from the original, so we
skip the new ones */
host_changed = TRUE;
}
else {
/* cut off the original URL from the first slash, or deal with URLs
without slash */
pathsep = strchr(protsep, '/');
if(pathsep) {
/* When people use badly formatted URLs, such as
"http://www.url.com?dir=/home/daniel" we must not use the first
slash, if there's a ?-letter before it! */
char *sep = strchr(protsep, '?');
if(sep && (sep < pathsep))
pathsep = sep;
*pathsep = 0;
}
else {
/* There was no slash. Now, since we might be operating on a badly
formatted URL, such as "http://www.url.com?id=2380" which doesn't
use a slash separator as it is supposed to, we need to check for a
?-letter as well! */
pathsep = strchr(protsep, '?');
if(pathsep)
*pathsep = 0;
}
}
}
2022-11-16 20:14:03 +01:00
Curl_dyn_init(&newest, CURL_MAX_INPUT_LENGTH);
2018-11-29 20:27:00 +01:00
/* copy over the root url part */
2022-11-16 20:14:03 +01:00
if(Curl_dyn_add(&newest, base))
return NULL;
2018-11-29 20:27:00 +01:00
/* check if we need to append a slash */
if(('/' == useurl[0]) || (protsep && !*protsep) || ('?' == useurl[0]))
;
2022-11-16 20:14:03 +01:00
else {
if(Curl_dyn_addn(&newest, "/", 1))
return NULL;
}
2018-11-29 20:27:00 +01:00
/* then append the new piece on the right side */
2022-11-16 20:14:03 +01:00
urlencode_str(&newest, useurl, strlen(useurl), !host_changed, FALSE);
2018-11-29 20:27:00 +01:00
2022-11-16 20:14:03 +01:00
return Curl_dyn_ptr(&newest);
2018-11-29 20:27:00 +01:00
}
2023-07-02 19:51:09 +02:00
/* scan for byte values <= 31, 127 and sometimes space */
static CURLUcode junkscan(const char *url, size_t *urllen, unsigned int flags)
2022-03-29 21:10:50 +02:00
{
2023-07-02 19:51:09 +02:00
static const char badbytes[]={
/* */ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
0x7f, 0x00 /* null-terminate */
};
size_t n = strlen(url);
size_t nfine;
if(n > CURL_MAX_INPUT_LENGTH)
/* excessive input length */
return CURLUE_MALFORMED_INPUT;
nfine = strcspn(url, badbytes);
if((nfine != n) ||
(!(flags & CURLU_ALLOW_SPACE) && strchr(url, ' ')))
return CURLUE_MALFORMED_INPUT;
*urllen = n;
return CURLUE_OK;
2022-03-29 21:10:50 +02:00
}
2018-11-29 20:27:00 +01:00
/*
* parse_hostname_login()
*
* Parse the login details (user name, password and options) from the URL and
* strip them out of the host name
*
*/
static CURLUcode parse_hostname_login(struct Curl_URL *u,
2023-07-02 19:51:09 +02:00
const char *login,
size_t len,
unsigned int flags,
size_t *offset) /* to the host name */
2018-11-29 20:27:00 +01:00
{
CURLUcode result = CURLUE_OK;
CURLcode ccode;
char *userp = NULL;
char *passwdp = NULL;
char *optionsp = NULL;
2020-08-30 11:54:41 +02:00
const struct Curl_handler *h = NULL;
2018-11-29 20:27:00 +01:00
2022-11-16 20:14:03 +01:00
/* At this point, we assume all the other special cases have been taken
* care of, so the host is at most
*
* [user[:password][;options]]@]hostname
2018-11-29 20:27:00 +01:00
*
* We need somewhere to put the embedded details, so do that first.
*/
2022-11-16 20:14:03 +01:00
char *ptr;
2018-11-29 20:27:00 +01:00
2022-11-16 20:14:03 +01:00
DEBUGASSERT(login);
2023-07-02 19:51:09 +02:00
*offset = 0;
ptr = memchr(login, '@', len);
2018-11-29 20:27:00 +01:00
if(!ptr)
goto out;
/* We will now try to extract the
* possible login information in a string like:
* ftp://user:password@ftp.my.site:8021/README */
2022-11-16 20:14:03 +01:00
ptr++;
2018-11-29 20:27:00 +01:00
2020-08-30 11:54:41 +02:00
/* if this is a known scheme, get some details */
if(u->scheme)
2022-11-16 20:14:03 +01:00
h = Curl_builtin_scheme(u->scheme, CURL_ZERO_TERMINATED);
2020-08-30 11:54:41 +02:00
2018-11-29 20:27:00 +01:00
/* We could use the login information in the URL so extract it. Only parse
options if the handler says we should. Note that 'h' might be NULL! */
ccode = Curl_parse_login_details(login, ptr - login - 1,
&userp, &passwdp,
(h && (h->flags & PROTOPT_URLOPTIONS)) ?
&optionsp:NULL);
if(ccode) {
2022-03-29 21:10:50 +02:00
result = CURLUE_BAD_LOGIN;
2018-11-29 20:27:00 +01:00
goto out;
}
if(userp) {
if(flags & CURLU_DISALLOW_USER) {
/* Option DISALLOW_USER is set and url contains username. */
result = CURLUE_USER_NOT_ALLOWED;
goto out;
}
2023-07-02 19:51:09 +02:00
free(u->user);
2018-11-29 20:27:00 +01:00
u->user = userp;
}
2022-03-29 21:10:50 +02:00
if(passwdp) {
2023-07-02 19:51:09 +02:00
free(u->password);
2018-11-29 20:27:00 +01:00
u->password = passwdp;
2022-03-29 21:10:50 +02:00
}
2018-11-29 20:27:00 +01:00
2022-03-29 21:10:50 +02:00
if(optionsp) {
2023-07-02 19:51:09 +02:00
free(u->options);
2018-11-29 20:27:00 +01:00
u->options = optionsp;
2022-03-29 21:10:50 +02:00
}
2018-11-29 20:27:00 +01:00
2023-07-02 19:51:09 +02:00
/* the host name starts at this offset */
*offset = ptr - login;
2018-11-29 20:27:00 +01:00
return CURLUE_OK;
2023-07-02 19:51:09 +02:00
out:
2018-11-29 20:27:00 +01:00
free(userp);
free(passwdp);
free(optionsp);
2022-03-29 21:10:50 +02:00
u->user = NULL;
u->password = NULL;
u->options = NULL;
2018-11-29 20:27:00 +01:00
return result;
}
2022-11-16 20:14:03 +01:00
UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, struct dynbuf *host,
2021-09-14 00:13:48 +02:00
bool has_scheme)
2018-11-29 20:27:00 +01:00
{
2023-07-02 19:51:09 +02:00
char *portptr;
2022-11-16 20:14:03 +01:00
char *hostname = Curl_dyn_ptr(host);
2019-11-11 23:01:05 +01:00
/*
2023-07-02 19:51:09 +02:00
* Find the end of an IPv6 address on the ']' ending bracket.
2019-11-11 23:01:05 +01:00
*/
2023-07-02 19:51:09 +02:00
if(hostname[0] == '[') {
portptr = strchr(hostname, ']');
if(!portptr)
2022-03-29 21:10:50 +02:00
return CURLUE_BAD_IPV6;
2023-07-02 19:51:09 +02:00
portptr++;
2019-11-11 23:01:05 +01:00
/* this is a RFC2732-style specified IP-address */
2023-07-02 19:51:09 +02:00
if(*portptr) {
2019-11-11 23:01:05 +01:00
if(*portptr != ':')
2023-07-02 19:51:09 +02:00
return CURLUE_BAD_PORT_NUMBER;
2019-11-11 23:01:05 +01:00
}
else
portptr = NULL;
2018-11-29 20:27:00 +01:00
}
else
portptr = strchr(hostname, ':');
if(portptr) {
char *rest;
long port;
2022-11-16 20:14:03 +01:00
size_t keep = portptr - hostname;
2018-11-29 20:27:00 +01:00
2019-11-11 23:01:05 +01:00
/* Browser behavior adaptation. If there's a colon with no digits after,
just cut off the name there which makes us ignore the colon and just
2021-09-14 00:13:48 +02:00
use the default port. Firefox, Chrome and Safari all do that.
Don't do it if the URL has no scheme, to make something that looks like
a scheme not work!
*/
2022-11-16 20:14:03 +01:00
Curl_dyn_setlen(host, keep);
portptr++;
if(!*portptr)
2021-09-14 00:13:48 +02:00
return has_scheme ? CURLUE_OK : CURLUE_BAD_PORT_NUMBER;
2019-11-11 23:01:05 +01:00
2022-11-16 20:14:03 +01:00
if(!ISDIGIT(*portptr))
2018-11-29 20:27:00 +01:00
return CURLUE_BAD_PORT_NUMBER;
2022-11-16 20:14:03 +01:00
port = strtol(portptr, &rest, 10); /* Port number must be decimal */
2018-11-29 20:27:00 +01:00
2022-03-29 21:10:50 +02:00
if(port > 0xffff)
2018-11-29 20:27:00 +01:00
return CURLUE_BAD_PORT_NUMBER;
if(rest[0])
return CURLUE_BAD_PORT_NUMBER;
2019-11-11 23:01:05 +01:00
u->portnum = port;
2023-07-02 19:51:09 +02:00
/* generate a new port number string to get rid of leading zeroes etc */
free(u->port);
u->port = aprintf("%ld", port);
2019-11-11 23:01:05 +01:00
if(!u->port)
return CURLUE_OUT_OF_MEMORY;
2018-11-29 20:27:00 +01:00
}
return CURLUE_OK;
}
2023-07-02 19:51:09 +02:00
/* this assumes 'hostname' now starts with [ */
static CURLUcode ipv6_parse(struct Curl_URL *u, char *hostname,
size_t hlen) /* length of hostname */
2018-11-29 20:27:00 +01:00
{
size_t len;
2023-07-02 19:51:09 +02:00
DEBUGASSERT(*hostname == '[');
if(hlen < 4) /* '[::]' is the shortest possible valid string */
return CURLUE_BAD_IPV6;
hostname++;
hlen -= 2;
/* only valid IPv6 letters are ok */
len = strspn(hostname, "0123456789abcdefABCDEF:.");
if(hlen != len) {
hlen = len;
if(hostname[len] == '%') {
/* this could now be '%[zone id]' */
char zoneid[16];
int i = 0;
char *h = &hostname[len + 1];
/* pass '25' if present and is a url encoded percent sign */
if(!strncmp(h, "25", 2) && h[2] && (h[2] != ']'))
h += 2;
while(*h && (*h != ']') && (i < 15))
zoneid[i++] = *h++;
if(!i || (']' != *h))
return CURLUE_BAD_IPV6;
zoneid[i] = 0;
u->zoneid = strdup(zoneid);
if(!u->zoneid)
return CURLUE_OUT_OF_MEMORY;
hostname[len] = ']'; /* insert end bracket */
hostname[len + 1] = 0; /* terminate the hostname */
}
else
2022-03-29 21:10:50 +02:00
return CURLUE_BAD_IPV6;
2023-07-02 19:51:09 +02:00
/* hostname is fine */
}
2018-11-29 20:27:00 +01:00
2023-07-02 19:51:09 +02:00
/* Check the IPv6 address. */
{
char dest[16]; /* fits a binary IPv6 address */
char norm[MAX_IPADR_LEN];
hostname[hlen] = 0; /* end the address there */
if(1 != Curl_inet_pton(AF_INET6, hostname, dest))
2022-03-29 21:10:50 +02:00
return CURLUE_BAD_IPV6;
2020-08-30 11:54:41 +02:00
2023-07-02 19:51:09 +02:00
/* check if it can be done shorter */
if(Curl_inet_ntop(AF_INET6, dest, norm, sizeof(norm)) &&
(strlen(norm) < hlen)) {
strcpy(hostname, norm);
hlen = strlen(norm);
hostname[hlen + 1] = 0;
2022-03-29 21:10:50 +02:00
}
2023-07-02 19:51:09 +02:00
hostname[hlen] = ']'; /* restore ending bracket */
2018-11-29 20:27:00 +01:00
}
2023-07-02 19:51:09 +02:00
return CURLUE_OK;
}
static CURLUcode hostname_check(struct Curl_URL *u, char *hostname,
size_t hlen) /* length of hostname */
{
size_t len;
DEBUGASSERT(hostname);
if(!hlen)
return CURLUE_NO_HOST;
else if(hostname[0] == '[')
return ipv6_parse(u, hostname, hlen);
2018-11-29 20:27:00 +01:00
else {
2022-08-04 22:12:04 +02:00
/* letters from the second string are not ok */
2023-07-02 19:51:09 +02:00
len = strcspn(hostname, " \r\n\t/:#?!@{}[]\\$\'\"^`*<>=;,+&()%");
2018-11-29 20:27:00 +01:00
if(hlen != len)
/* hostname with bad content */
2022-03-29 21:10:50 +02:00
return CURLUE_BAD_HOSTNAME;
2018-11-29 20:27:00 +01:00
}
return CURLUE_OK;
}
2021-09-14 00:13:48 +02:00
/*
* Handle partial IPv4 numerical addresses and different bases, like
* '16843009', '0x7f', '0x7f.1' '0177.1.1.1' etc.
*
2023-07-02 19:51:09 +02:00
* If the given input string is syntactically wrong IPv4 or any part for
* example is too big, this function returns HOST_NAME.
2021-09-14 00:13:48 +02:00
*
* Output the "normalized" version of that input string in plain quad decimal
2023-07-02 19:51:09 +02:00
* integers.
*
* Returns the host type.
2021-09-14 00:13:48 +02:00
*/
2023-07-02 19:51:09 +02:00
#define HOST_ERROR -1 /* out of memory */
#define HOST_BAD -2 /* bad IPv4 address */
#define HOST_NAME 1
#define HOST_IPV4 2
#define HOST_IPV6 3
static int ipv4_normalize(struct dynbuf *host)
2021-09-14 00:13:48 +02:00
{
bool done = FALSE;
int n = 0;
2023-07-02 19:51:09 +02:00
const char *c = Curl_dyn_ptr(host);
2021-09-14 00:13:48 +02:00
unsigned long parts[4] = {0, 0, 0, 0};
2023-07-02 19:51:09 +02:00
CURLcode result = CURLE_OK;
if(*c == '[')
return HOST_IPV6;
2021-09-14 00:13:48 +02:00
while(!done) {
char *endp;
unsigned long l;
2023-07-02 19:51:09 +02:00
if(!ISDIGIT(*c))
2021-09-14 00:13:48 +02:00
/* most importantly this doesn't allow a leading plus or minus */
2023-07-02 19:51:09 +02:00
return HOST_NAME;
2021-09-14 00:13:48 +02:00
l = strtoul(c, &endp, 0);
parts[n] = l;
c = endp;
2023-07-02 19:51:09 +02:00
switch(*c) {
case '.':
2021-09-14 00:13:48 +02:00
if(n == 3)
2023-07-02 19:51:09 +02:00
return HOST_NAME;
2021-09-14 00:13:48 +02:00
n++;
c++;
break;
case '\0':
done = TRUE;
break;
default:
2023-07-02 19:51:09 +02:00
return HOST_NAME;
2021-09-14 00:13:48 +02:00
}
2023-07-02 19:51:09 +02:00
/* overflow */
if((l == ULONG_MAX) && (errno == ERANGE))
return HOST_NAME;
#if SIZEOF_LONG > 4
/* a value larger than 32 bits */
if(l > UINT_MAX)
return HOST_NAME;
#endif
}
2021-09-14 00:13:48 +02:00
switch(n) {
case 0: /* a -- 32 bits */
2023-07-02 19:51:09 +02:00
Curl_dyn_reset(host);
result = Curl_dyn_addf(host, "%u.%u.%u.%u",
parts[0] >> 24, (parts[0] >> 16) & 0xff,
(parts[0] >> 8) & 0xff, parts[0] & 0xff);
2021-09-14 00:13:48 +02:00
break;
case 1: /* a.b -- 8.24 bits */
if((parts[0] > 0xff) || (parts[1] > 0xffffff))
2023-07-02 19:51:09 +02:00
return HOST_NAME;
Curl_dyn_reset(host);
result = Curl_dyn_addf(host, "%u.%u.%u.%u",
parts[0], (parts[1] >> 16) & 0xff,
(parts[1] >> 8) & 0xff, parts[1] & 0xff);
2021-09-14 00:13:48 +02:00
break;
case 2: /* a.b.c -- 8.8.16 bits */
if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xffff))
2023-07-02 19:51:09 +02:00
return HOST_NAME;
Curl_dyn_reset(host);
result = Curl_dyn_addf(host, "%u.%u.%u.%u",
parts[0], parts[1], (parts[2] >> 8) & 0xff,
parts[2] & 0xff);
2021-09-14 00:13:48 +02:00
break;
case 3: /* a.b.c.d -- 8.8.8.8 bits */
if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xff) ||
(parts[3] > 0xff))
2023-07-02 19:51:09 +02:00
return HOST_NAME;
Curl_dyn_reset(host);
result = Curl_dyn_addf(host, "%u.%u.%u.%u",
parts[0], parts[1], parts[2], parts[3]);
2021-09-14 00:13:48 +02:00
break;
}
2023-07-02 19:51:09 +02:00
if(result)
return HOST_ERROR;
return HOST_IPV4;
2021-09-14 00:13:48 +02:00
}
2022-11-16 20:14:03 +01:00
/* if necessary, replace the host content with a URL decoded version */
2023-07-02 19:51:09 +02:00
static CURLUcode urldecode_host(struct dynbuf *host)
2022-03-29 21:10:50 +02:00
{
char *per = NULL;
2022-11-16 20:14:03 +01:00
const char *hostname = Curl_dyn_ptr(host);
per = strchr(hostname, '%');
if(!per)
/* nothing to decode */
return CURLUE_OK;
2022-03-29 21:10:50 +02:00
else {
2022-11-16 20:14:03 +01:00
/* encoded */
2022-03-29 21:10:50 +02:00
size_t dlen;
2022-11-16 20:14:03 +01:00
char *decoded;
CURLcode result = Curl_urldecode(hostname, 0, &decoded, &dlen,
REJECT_CTRL);
2022-03-29 21:10:50 +02:00
if(result)
return CURLUE_BAD_HOSTNAME;
2022-11-16 20:14:03 +01:00
Curl_dyn_reset(host);
result = Curl_dyn_addn(host, decoded, dlen);
free(decoded);
if(result)
return CURLUE_OUT_OF_MEMORY;
2022-03-29 21:10:50 +02:00
}
return CURLUE_OK;
}
2023-07-02 19:51:09 +02:00
static CURLUcode parse_authority(struct Curl_URL *u,
const char *auth, size_t authlen,
unsigned int flags,
struct dynbuf *host,
bool has_scheme)
{
size_t offset;
CURLUcode result;
/*
* Parse the login details and strip them out of the host name.
*/
result = parse_hostname_login(u, auth, authlen, flags, &offset);
if(result)
goto out;
if(Curl_dyn_addn(host, auth + offset, authlen - offset)) {
result = CURLUE_OUT_OF_MEMORY;
goto out;
}
result = Curl_parse_port(u, host, has_scheme);
if(result)
goto out;
if(!Curl_dyn_len(host))
return CURLUE_NO_HOST;
switch(ipv4_normalize(host)) {
case HOST_IPV4:
break;
case HOST_IPV6:
result = ipv6_parse(u, Curl_dyn_ptr(host), Curl_dyn_len(host));
break;
case HOST_NAME:
result = urldecode_host(host);
if(!result)
result = hostname_check(u, Curl_dyn_ptr(host), Curl_dyn_len(host));
break;
case HOST_ERROR:
result = CURLUE_OUT_OF_MEMORY;
break;
case HOST_BAD:
default:
result = CURLUE_BAD_HOSTNAME; /* Bad IPv4 address even */
break;
}
out:
return result;
}
CURLUcode Curl_url_set_authority(CURLU *u, const char *authority,
unsigned int flags)
{
CURLUcode result;
struct dynbuf host;
DEBUGASSERT(authority);
Curl_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
result = parse_authority(u, authority, strlen(authority), flags,
&host, !!u->scheme);
if(result)
Curl_dyn_free(&host);
else {
free(u->host);
u->host = Curl_dyn_ptr(&host);
}
return result;
}
2022-11-16 20:14:03 +01:00
/*
* "Remove Dot Segments"
* https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.4
*/
/*
* dedotdotify()
* @unittest: 1395
*
* This function gets a null-terminated path with dot and dotdot sequences
* passed in and strips them off according to the rules in RFC 3986 section
* 5.2.4.
*
* The function handles a query part ('?' + stuff) appended but it expects
* that fragments ('#' + stuff) have already been cut off.
*
* RETURNS
*
2023-07-02 19:51:09 +02:00
* Zero for success and 'out' set to an allocated dedotdotified string.
2022-11-16 20:14:03 +01:00
*/
2023-07-02 19:51:09 +02:00
UNITTEST int dedotdotify(const char *input, size_t clen, char **outp);
UNITTEST int dedotdotify(const char *input, size_t clen, char **outp)
2018-11-29 20:27:00 +01:00
{
2022-11-16 20:14:03 +01:00
char *outptr;
2023-07-02 19:51:09 +02:00
const char *endp = &input[clen];
char *out;
*outp = NULL;
/* the path always starts with a slash, and a slash has not dot */
if((clen < 2) || !memchr(input, '.', clen))
return 0;
out = malloc(clen + 1);
2022-11-16 20:14:03 +01:00
if(!out)
2023-07-02 19:51:09 +02:00
return 1; /* out of memory */
2022-11-16 20:14:03 +01:00
*out = 0; /* null-terminates, for inputs like "./" */
outptr = out;
do {
bool dotdot = TRUE;
if(*input == '.') {
/* A. If the input buffer begins with a prefix of "../" or "./", then
remove that prefix from the input buffer; otherwise, */
if(!strncmp("./", input, 2)) {
input += 2;
clen -= 2;
}
else if(!strncmp("../", input, 3)) {
input += 3;
clen -= 3;
}
/* D. if the input buffer consists only of "." or "..", then remove
that from the input buffer; otherwise, */
else if(!strcmp(".", input) || !strcmp("..", input) ||
!strncmp(".?", input, 2) || !strncmp("..?", input, 3)) {
*out = 0;
break;
}
else
dotdot = FALSE;
}
else if(*input == '/') {
/* B. if the input buffer begins with a prefix of "/./" or "/.", where
"." is a complete path segment, then replace that prefix with "/" in
the input buffer; otherwise, */
if(!strncmp("/./", input, 3)) {
input += 2;
clen -= 2;
}
else if(!strcmp("/.", input) || !strncmp("/.?", input, 3)) {
*outptr++ = '/';
*outptr = 0;
break;
}
/* C. if the input buffer begins with a prefix of "/../" or "/..",
where ".." is a complete path segment, then replace that prefix with
"/" in the input buffer and remove the last segment and its
preceding "/" (if any) from the output buffer; otherwise, */
else if(!strncmp("/../", input, 4)) {
input += 3;
clen -= 3;
/* remove the last segment from the output buffer */
while(outptr > out) {
outptr--;
if(*outptr == '/')
break;
}
*outptr = 0; /* null-terminate where it stops */
}
else if(!strcmp("/..", input) || !strncmp("/..?", input, 4)) {
/* remove the last segment from the output buffer */
while(outptr > out) {
outptr--;
if(*outptr == '/')
break;
}
*outptr++ = '/';
*outptr = 0; /* null-terminate where it stops */
break;
}
else
dotdot = FALSE;
}
else
dotdot = FALSE;
if(!dotdot) {
/* E. move the first path segment in the input buffer to the end of
the output buffer, including the initial "/" character (if any) and
any subsequent characters up to, but not including, the next "/"
character or the end of the input buffer. */
do {
*outptr++ = *input++;
clen--;
} while(*input && (*input != '/') && (*input != '?'));
*outptr = 0;
}
2023-07-02 19:51:09 +02:00
/* continue until end of path */
} while(input < endp);
2022-11-16 20:14:03 +01:00
2023-07-02 19:51:09 +02:00
*outp = out;
return 0; /* success */
2022-11-16 20:14:03 +01:00
}
static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
{
const char *path;
size_t pathlen;
2018-11-29 20:27:00 +01:00
char *query = NULL;
char *fragment = NULL;
2019-11-11 23:01:05 +01:00
char schemebuf[MAX_SCHEME_LEN + 1];
2018-11-29 20:27:00 +01:00
size_t schemelen = 0;
size_t urllen;
2022-11-16 20:14:03 +01:00
CURLUcode result = CURLUE_OK;
size_t fraglen = 0;
struct dynbuf host;
2018-11-29 20:27:00 +01:00
2021-11-20 13:41:27 +01:00
DEBUGASSERT(url);
2018-11-29 20:27:00 +01:00
2022-11-16 20:14:03 +01:00
Curl_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
2023-07-02 19:51:09 +02:00
result = junkscan(url, &urllen, flags);
if(result)
2022-11-16 20:14:03 +01:00
goto fail;
2018-11-29 20:27:00 +01:00
2022-11-16 20:14:03 +01:00
schemelen = Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf),
flags & (CURLU_GUESS_SCHEME|
CURLU_DEFAULT_SCHEME));
2018-11-29 20:27:00 +01:00
/* handle the file: scheme */
2022-11-16 20:14:03 +01:00
if(schemelen && !strcmp(schemebuf, "file")) {
2023-07-02 19:51:09 +02:00
bool uncpath = FALSE;
2022-11-16 20:14:03 +01:00
if(urllen <= 6) {
2022-03-29 21:10:50 +02:00
/* file:/ is not enough to actually be a complete file: URL */
2022-11-16 20:14:03 +01:00
result = CURLUE_BAD_FILE_URL;
goto fail;
}
2022-03-29 21:10:50 +02:00
2018-11-29 20:27:00 +01:00
/* path has been allocated large enough to hold this */
2022-11-16 20:14:03 +01:00
path = (char *)&url[5];
2023-07-02 19:51:09 +02:00
pathlen = urllen - 5;
2018-11-29 20:27:00 +01:00
2023-07-02 19:51:09 +02:00
u->scheme = strdup("file");
2022-11-16 20:14:03 +01:00
if(!u->scheme) {
result = CURLUE_OUT_OF_MEMORY;
goto fail;
}
2018-11-29 20:27:00 +01:00
/* Extra handling URLs with an authority component (i.e. that start with
* "file://")
*
* We allow omitted hostname (e.g. file:/<path>) -- valid according to
* RFC 8089, but not the (current) WHAT-WG URL spec.
*/
if(path[0] == '/' && path[1] == '/') {
/* swallow the two slashes */
2022-11-16 20:14:03 +01:00
const char *ptr = &path[2];
2018-11-29 20:27:00 +01:00
/*
* According to RFC 8089, a file: URL can be reliably dereferenced if:
*
* o it has no/blank hostname, or
*
* o the hostname matches "localhost" (case-insensitively), or
*
2022-03-29 21:10:50 +02:00
* o the hostname is a FQDN that resolves to this machine, or
*
* o it is an UNC String transformed to an URI (Windows only, RFC 8089
* Appendix E.3).
2018-11-29 20:27:00 +01:00
*
* For brevity, we only consider URLs with empty, "localhost", or
2022-03-29 21:10:50 +02:00
* "127.0.0.1" hostnames as local, otherwise as an UNC String.
2018-11-29 20:27:00 +01:00
*
* Additionally, there is an exception for URLs with a Windows drive
* letter in the authority (which was accidentally omitted from RFC 8089
* Appendix E, but believe me, it was meant to be there. --MK)
*/
if(ptr[0] != '/' && !STARTS_WITH_URL_DRIVE_PREFIX(ptr)) {
/* the URL includes a host name, it must match "localhost" or
"127.0.0.1" to be valid */
2022-03-29 21:10:50 +02:00
if(checkprefix("localhost/", ptr) ||
checkprefix("127.0.0.1/", ptr)) {
ptr += 9; /* now points to the slash after the host */
}
else {
#if defined(WIN32)
size_t len;
/* the host name, NetBIOS computer name, can not contain disallowed
chars, and the delimiting slash character must be appended to the
host name */
path = strpbrk(ptr, "/\\:*?\"<>|");
2022-11-16 20:14:03 +01:00
if(!path || *path != '/') {
result = CURLUE_BAD_FILE_URL;
goto fail;
}
2022-03-29 21:10:50 +02:00
len = path - ptr;
if(len) {
2022-11-16 20:14:03 +01:00
if(Curl_dyn_addn(&host, ptr, len)) {
result = CURLUE_OUT_OF_MEMORY;
goto fail;
}
2022-03-29 21:10:50 +02:00
uncpath = TRUE;
}
ptr -= 2; /* now points to the // before the host in UNC */
#else
2018-11-29 20:27:00 +01:00
/* Invalid file://hostname/, expected localhost or 127.0.0.1 or
none */
2022-11-16 20:14:03 +01:00
result = CURLUE_BAD_FILE_URL;
goto fail;
2022-03-29 21:10:50 +02:00
#endif
2018-11-29 20:27:00 +01:00
}
}
path = ptr;
2023-07-02 19:51:09 +02:00
pathlen = urllen - (ptr - url);
2018-11-29 20:27:00 +01:00
}
2022-03-29 21:10:50 +02:00
if(!uncpath)
2022-11-16 20:14:03 +01:00
/* no host for file: URLs by default */
Curl_dyn_reset(&host);
2022-03-29 21:10:50 +02:00
2018-11-29 20:27:00 +01:00
#if !defined(MSDOS) && !defined(WIN32) && !defined(__CYGWIN__)
/* Don't allow Windows drive letters when not in Windows.
* This catches both "file:/c:" and "file:c:" */
if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) ||
STARTS_WITH_URL_DRIVE_PREFIX(path)) {
/* File drive letters are only accepted in MSDOS/Windows */
2022-11-16 20:14:03 +01:00
result = CURLUE_BAD_FILE_URL;
goto fail;
2018-11-29 20:27:00 +01:00
}
#else
/* If the path starts with a slash and a drive letter, ditch the slash */
if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) {
/* This cannot be done with strcpy, as the memory chunks overlap! */
2022-11-16 20:14:03 +01:00
path++;
2018-11-29 20:27:00 +01:00
}
#endif
}
else {
/* clear path */
2023-07-02 19:51:09 +02:00
const char *schemep = NULL;
2018-11-29 20:27:00 +01:00
const char *hostp;
2023-07-02 19:51:09 +02:00
size_t hostlen;
2018-11-29 20:27:00 +01:00
2022-11-16 20:14:03 +01:00
if(schemelen) {
2018-11-29 20:27:00 +01:00
int i = 0;
2023-07-02 19:51:09 +02:00
const char *p = &url[schemelen + 1];
while((*p == '/') && (i < 4)) {
2018-11-29 20:27:00 +01:00
p++;
i++;
}
schemep = schemebuf;
2022-11-16 20:14:03 +01:00
if(!Curl_builtin_scheme(schemep, CURL_ZERO_TERMINATED) &&
!(flags & CURLU_NON_SUPPORT_SCHEME)) {
result = CURLUE_UNSUPPORTED_SCHEME;
goto fail;
}
2018-11-29 20:27:00 +01:00
2023-07-02 19:51:09 +02:00
if((i < 1) || (i > 3)) {
2022-11-16 20:14:03 +01:00
/* less than one or more than three slashes */
result = CURLUE_BAD_SLASHES;
goto fail;
}
2023-07-02 19:51:09 +02:00
hostp = p; /* host name starts here */
2018-11-29 20:27:00 +01:00
}
else {
/* no scheme! */
2022-11-16 20:14:03 +01:00
if(!(flags & (CURLU_DEFAULT_SCHEME|CURLU_GUESS_SCHEME))) {
result = CURLUE_BAD_SCHEME;
goto fail;
}
2018-11-29 20:27:00 +01:00
if(flags & CURLU_DEFAULT_SCHEME)
2020-08-30 11:54:41 +02:00
schemep = DEFAULT_SCHEME;
2018-11-29 20:27:00 +01:00
/*
* The URL was badly formatted, let's try without scheme specified.
*/
2023-07-02 19:51:09 +02:00
hostp = url;
2018-11-29 20:27:00 +01:00
}
2023-07-02 19:51:09 +02:00
if(schemep) {
u->scheme = strdup(schemep);
if(!u->scheme) {
2022-11-16 20:14:03 +01:00
result = CURLUE_OUT_OF_MEMORY;
goto fail;
}
2020-08-30 11:54:41 +02:00
}
2023-07-02 19:51:09 +02:00
/* find the end of the host name + port number */
hostlen = strcspn(hostp, "/?#");
path = &hostp[hostlen];
/* this pathlen also contains the query and the fragment */
pathlen = urllen - (path - url);
if(hostlen) {
result = parse_authority(u, hostp, hostlen, flags, &host, schemelen);
if(result)
2022-11-16 20:14:03 +01:00
goto fail;
2018-11-29 20:27:00 +01:00
2023-07-02 19:51:09 +02:00
if((flags & CURLU_GUESS_SCHEME) && !schemep) {
const char *hostname = Curl_dyn_ptr(&host);
/* legacy curl-style guess based on host name */
if(checkprefix("ftp.", hostname))
schemep = "ftp";
else if(checkprefix("dict.", hostname))
schemep = "dict";
else if(checkprefix("ldap.", hostname))
schemep = "ldap";
else if(checkprefix("imap.", hostname))
schemep = "imap";
else if(checkprefix("smtp.", hostname))
schemep = "smtp";
else if(checkprefix("pop3.", hostname))
schemep = "pop3";
else
schemep = "http";
2018-11-29 20:27:00 +01:00
2023-07-02 19:51:09 +02:00
u->scheme = strdup(schemep);
if(!u->scheme) {
result = CURLUE_OUT_OF_MEMORY;
goto fail;
}
}
}
else if(flags & CURLU_NO_AUTHORITY) {
/* allowed to be empty. */
if(Curl_dyn_add(&host, "")) {
2022-11-16 20:14:03 +01:00
result = CURLUE_OUT_OF_MEMORY;
goto fail;
}
2020-08-30 11:54:41 +02:00
}
2023-07-02 19:51:09 +02:00
else {
result = CURLUE_NO_HOST;
goto fail;
}
2018-11-29 20:27:00 +01:00
}
2020-08-30 11:54:41 +02:00
fragment = strchr(path, '#');
if(fragment) {
2023-07-02 19:51:09 +02:00
fraglen = pathlen - (fragment - path);
2022-11-16 20:14:03 +01:00
if(fraglen > 1) {
/* skip the leading '#' in the copy but include the terminating null */
2023-07-02 19:51:09 +02:00
if(flags & CURLU_URLENCODE) {
struct dynbuf enc;
Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
if(urlencode_str(&enc, fragment + 1, fraglen, TRUE, FALSE)) {
result = CURLUE_OUT_OF_MEMORY;
goto fail;
}
u->fragment = Curl_dyn_ptr(&enc);
2022-11-16 20:14:03 +01:00
}
2023-07-02 19:51:09 +02:00
else {
u->fragment = Curl_memdup(fragment + 1, fraglen);
if(!u->fragment) {
result = CURLUE_OUT_OF_MEMORY;
goto fail;
}
2022-11-16 20:14:03 +01:00
}
2020-08-30 11:54:41 +02:00
}
2023-07-02 19:51:09 +02:00
/* after this, pathlen still contains the query */
pathlen -= fraglen;
2020-08-30 11:54:41 +02:00
}
2023-07-02 19:51:09 +02:00
DEBUGASSERT(pathlen < urllen);
query = memchr(path, '?', pathlen);
if(query) {
size_t qlen = fragment ? (size_t)(fragment - query) :
pathlen - (query - path);
pathlen -= qlen;
2022-11-16 20:14:03 +01:00
if(qlen > 1) {
2023-07-02 19:51:09 +02:00
if(flags & CURLU_URLENCODE) {
2022-11-16 20:14:03 +01:00
struct dynbuf enc;
Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
/* skip the leading question mark */
if(urlencode_str(&enc, query + 1, qlen - 1, TRUE, TRUE)) {
result = CURLUE_OUT_OF_MEMORY;
goto fail;
}
u->query = Curl_dyn_ptr(&enc);
}
else {
u->query = Curl_memdup(query + 1, qlen);
if(!u->query) {
result = CURLUE_OUT_OF_MEMORY;
goto fail;
}
u->query[qlen - 1] = 0;
}
}
else {
/* single byte query */
u->query = strdup("");
if(!u->query) {
result = CURLUE_OUT_OF_MEMORY;
goto fail;
}
}
}
if(pathlen && (flags & CURLU_URLENCODE)) {
struct dynbuf enc;
Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
if(urlencode_str(&enc, path, pathlen, TRUE, FALSE)) {
result = CURLUE_OUT_OF_MEMORY;
goto fail;
}
pathlen = Curl_dyn_len(&enc);
path = u->path = Curl_dyn_ptr(&enc);
}
2022-03-29 21:10:50 +02:00
2023-07-02 19:51:09 +02:00
if(pathlen <= 1) {
/* there is no path left or just the slash, unset */
2018-11-29 20:27:00 +01:00
path = NULL;
2022-11-16 20:14:03 +01:00
}
2020-08-30 11:54:41 +02:00
else {
2022-11-16 20:14:03 +01:00
if(!u->path) {
u->path = Curl_memdup(path, pathlen + 1);
if(!u->path) {
result = CURLUE_OUT_OF_MEMORY;
goto fail;
}
u->path[pathlen] = 0;
path = u->path;
}
else if(flags & CURLU_URLENCODE)
/* it might have encoded more than just the path so cut it */
u->path[pathlen] = 0;
2020-08-30 11:54:41 +02:00
if(!(flags & CURLU_PATH_AS_IS)) {
/* remove ../ and ./ sequences according to RFC3986 */
2023-07-02 19:51:09 +02:00
char *dedot;
int err = dedotdotify((char *)path, pathlen, &dedot);
if(err) {
2022-11-16 20:14:03 +01:00
result = CURLUE_OUT_OF_MEMORY;
goto fail;
2020-08-30 11:54:41 +02:00
}
2023-07-02 19:51:09 +02:00
if(dedot) {
free(u->path);
u->path = dedot;
2022-11-16 20:14:03 +01:00
}
2020-08-30 11:54:41 +02:00
}
2018-11-29 20:27:00 +01:00
}
2022-11-16 20:14:03 +01:00
u->host = Curl_dyn_ptr(&host);
2018-11-29 20:27:00 +01:00
2022-11-16 20:14:03 +01:00
return result;
2023-07-02 19:51:09 +02:00
fail:
2022-11-16 20:14:03 +01:00
Curl_dyn_free(&host);
free_urlhandle(u);
2018-11-29 20:27:00 +01:00
return result;
}
2022-08-04 22:12:04 +02:00
/*
* Parse the URL and, if successful, replace everything in the Curl_URL struct.
*/
static CURLUcode parseurl_and_replace(const char *url, CURLU *u,
unsigned int flags)
{
CURLUcode result;
CURLU tmpurl;
memset(&tmpurl, 0, sizeof(tmpurl));
result = parseurl(url, &tmpurl, flags);
if(!result) {
free_urlhandle(u);
*u = tmpurl;
}
return result;
}
2018-11-29 20:27:00 +01:00
/*
*/
CURLU *curl_url(void)
{
return calloc(sizeof(struct Curl_URL), 1);
}
void curl_url_cleanup(CURLU *u)
{
if(u) {
free_urlhandle(u);
free(u);
}
}
2021-09-14 00:13:48 +02:00
#define DUP(dest, src, name) \
do { \
if(src->name) { \
dest->name = strdup(src->name); \
if(!dest->name) \
goto fail; \
} \
} while(0)
2018-11-29 20:27:00 +01:00
2023-07-02 19:51:09 +02:00
CURLU *curl_url_dup(const CURLU *in)
2018-11-29 20:27:00 +01:00
{
struct Curl_URL *u = calloc(sizeof(struct Curl_URL), 1);
if(u) {
DUP(u, in, scheme);
DUP(u, in, user);
DUP(u, in, password);
DUP(u, in, options);
DUP(u, in, host);
DUP(u, in, port);
DUP(u, in, path);
DUP(u, in, query);
DUP(u, in, fragment);
u->portnum = in->portnum;
}
return u;
2023-07-02 19:51:09 +02:00
fail:
2018-11-29 20:27:00 +01:00
curl_url_cleanup(u);
return NULL;
}
2023-07-02 19:51:09 +02:00
CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
2018-11-29 20:27:00 +01:00
char **part, unsigned int flags)
{
2023-07-02 19:51:09 +02:00
const char *ptr;
2018-11-29 20:27:00 +01:00
CURLUcode ifmissing = CURLUE_UNKNOWN_PART;
char portbuf[7];
bool urldecode = (flags & CURLU_URLDECODE)?1:0;
2022-03-29 21:10:50 +02:00
bool urlencode = (flags & CURLU_URLENCODE)?1:0;
2023-07-02 19:51:09 +02:00
bool punycode = FALSE;
2018-11-29 20:27:00 +01:00
bool plusdecode = FALSE;
(void)flags;
if(!u)
return CURLUE_BAD_HANDLE;
if(!part)
return CURLUE_BAD_PARTPOINTER;
*part = NULL;
switch(what) {
case CURLUPART_SCHEME:
ptr = u->scheme;
ifmissing = CURLUE_NO_SCHEME;
urldecode = FALSE; /* never for schemes */
break;
case CURLUPART_USER:
ptr = u->user;
ifmissing = CURLUE_NO_USER;
break;
case CURLUPART_PASSWORD:
ptr = u->password;
ifmissing = CURLUE_NO_PASSWORD;
break;
case CURLUPART_OPTIONS:
ptr = u->options;
ifmissing = CURLUE_NO_OPTIONS;
break;
case CURLUPART_HOST:
ptr = u->host;
ifmissing = CURLUE_NO_HOST;
2023-07-02 19:51:09 +02:00
punycode = (flags & CURLU_PUNYCODE)?1:0;
2018-11-29 20:27:00 +01:00
break;
2019-11-11 23:01:05 +01:00
case CURLUPART_ZONEID:
ptr = u->zoneid;
2022-03-29 21:10:50 +02:00
ifmissing = CURLUE_NO_ZONEID;
2019-11-11 23:01:05 +01:00
break;
2018-11-29 20:27:00 +01:00
case CURLUPART_PORT:
ptr = u->port;
ifmissing = CURLUE_NO_PORT;
urldecode = FALSE; /* never for port */
if(!ptr && (flags & CURLU_DEFAULT_PORT) && u->scheme) {
/* there's no stored port number, but asked to deliver
a default one for the scheme */
const struct Curl_handler *h =
2022-11-16 20:14:03 +01:00
Curl_builtin_scheme(u->scheme, CURL_ZERO_TERMINATED);
2018-11-29 20:27:00 +01:00
if(h) {
2021-11-20 13:41:27 +01:00
msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
2018-11-29 20:27:00 +01:00
ptr = portbuf;
}
}
else if(ptr && u->scheme) {
/* there is a stored port number, but ask to inhibit if
it matches the default one for the scheme */
const struct Curl_handler *h =
2022-11-16 20:14:03 +01:00
Curl_builtin_scheme(u->scheme, CURL_ZERO_TERMINATED);
2018-11-29 20:27:00 +01:00
if(h && (h->defport == u->portnum) &&
(flags & CURLU_NO_DEFAULT_PORT))
ptr = NULL;
}
break;
case CURLUPART_PATH:
ptr = u->path;
2023-07-02 19:51:09 +02:00
if(!ptr)
ptr = "/";
2018-11-29 20:27:00 +01:00
break;
case CURLUPART_QUERY:
ptr = u->query;
ifmissing = CURLUE_NO_QUERY;
plusdecode = urldecode;
break;
case CURLUPART_FRAGMENT:
ptr = u->fragment;
ifmissing = CURLUE_NO_FRAGMENT;
break;
case CURLUPART_URL: {
char *url;
char *scheme;
char *options = u->options;
char *port = u->port;
2019-11-11 23:01:05 +01:00
char *allochost = NULL;
2023-07-02 19:51:09 +02:00
punycode = (flags & CURLU_PUNYCODE)?1:0;
2018-11-29 20:27:00 +01:00
if(u->scheme && strcasecompare("file", u->scheme)) {
url = aprintf("file://%s%s%s",
u->path,
u->fragment? "#": "",
u->fragment? u->fragment : "");
}
else if(!u->host)
return CURLUE_NO_HOST;
else {
const struct Curl_handler *h = NULL;
if(u->scheme)
scheme = u->scheme;
else if(flags & CURLU_DEFAULT_SCHEME)
scheme = (char *) DEFAULT_SCHEME;
else
return CURLUE_NO_SCHEME;
2022-11-16 20:14:03 +01:00
h = Curl_builtin_scheme(scheme, CURL_ZERO_TERMINATED);
2020-08-30 11:54:41 +02:00
if(!port && (flags & CURLU_DEFAULT_PORT)) {
/* there's no stored port number, but asked to deliver
a default one for the scheme */
if(h) {
2021-11-20 13:41:27 +01:00
msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
2020-08-30 11:54:41 +02:00
port = portbuf;
2018-11-29 20:27:00 +01:00
}
}
2020-08-30 11:54:41 +02:00
else if(port) {
/* there is a stored port number, but asked to inhibit if it matches
the default one for the scheme */
if(h && (h->defport == u->portnum) &&
(flags & CURLU_NO_DEFAULT_PORT))
port = NULL;
}
2018-11-29 20:27:00 +01:00
if(h && !(h->flags & PROTOPT_URLOPTIONS))
options = NULL;
2022-03-29 21:10:50 +02:00
if(u->host[0] == '[') {
if(u->zoneid) {
/* make it '[ host %25 zoneid ]' */
2022-11-16 20:14:03 +01:00
struct dynbuf enc;
2022-03-29 21:10:50 +02:00
size_t hostlen = strlen(u->host);
2022-11-16 20:14:03 +01:00
Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
if(Curl_dyn_addf(&enc, "%.*s%%25%s]", (int)hostlen - 1, u->host,
u->zoneid))
2022-03-29 21:10:50 +02:00
return CURLUE_OUT_OF_MEMORY;
2022-11-16 20:14:03 +01:00
allochost = Curl_dyn_ptr(&enc);
2022-03-29 21:10:50 +02:00
}
}
else if(urlencode) {
allochost = curl_easy_escape(NULL, u->host, 0);
2019-11-11 23:01:05 +01:00
if(!allochost)
return CURLUE_OUT_OF_MEMORY;
2022-03-29 21:10:50 +02:00
}
2023-07-02 19:51:09 +02:00
else if(punycode) {
if(!Curl_is_ASCII_name(u->host)) {
#ifndef USE_IDN
return CURLUE_LACKS_IDN;
#else
allochost = Curl_idn_decode(u->host);
if(!allochost)
return CURLUE_OUT_OF_MEMORY;
#endif
2022-03-29 21:10:50 +02:00
}
2019-11-11 23:01:05 +01:00
}
2018-11-29 20:27:00 +01:00
url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
scheme,
u->user ? u->user : "",
u->password ? ":": "",
u->password ? u->password : "",
options ? ";" : "",
options ? options : "",
(u->user || u->password || options) ? "@": "",
2019-11-11 23:01:05 +01:00
allochost ? allochost : u->host,
2018-11-29 20:27:00 +01:00
port ? ":": "",
port ? port : "",
(u->path && (u->path[0] != '/')) ? "/": "",
u->path ? u->path : "/",
2019-11-11 23:01:05 +01:00
(u->query && u->query[0]) ? "?": "",
(u->query && u->query[0]) ? u->query : "",
2018-11-29 20:27:00 +01:00
u->fragment? "#": "",
u->fragment? u->fragment : "");
2019-11-11 23:01:05 +01:00
free(allochost);
2018-11-29 20:27:00 +01:00
}
if(!url)
return CURLUE_OUT_OF_MEMORY;
*part = url;
return CURLUE_OK;
}
default:
ptr = NULL;
2019-11-11 23:01:05 +01:00
break;
2018-11-29 20:27:00 +01:00
}
if(ptr) {
2022-11-16 20:14:03 +01:00
size_t partlen = strlen(ptr);
size_t i = 0;
*part = Curl_memdup(ptr, partlen + 1);
2018-11-29 20:27:00 +01:00
if(!*part)
return CURLUE_OUT_OF_MEMORY;
if(plusdecode) {
/* convert + to space */
2022-11-16 20:14:03 +01:00
char *plus = *part;
for(i = 0; i < partlen; ++plus, i++) {
2018-11-29 20:27:00 +01:00
if(*plus == '+')
*plus = ' ';
}
}
if(urldecode) {
char *decoded;
size_t dlen;
2020-08-30 11:54:41 +02:00
/* this unconditional rejection of control bytes is documented
API behavior */
2022-08-04 22:12:04 +02:00
CURLcode res = Curl_urldecode(*part, 0, &decoded, &dlen, REJECT_CTRL);
2018-11-29 20:27:00 +01:00
free(*part);
if(res) {
*part = NULL;
return CURLUE_URLDECODE;
}
*part = decoded;
2022-11-16 20:14:03 +01:00
partlen = dlen;
2018-11-29 20:27:00 +01:00
}
2022-11-16 20:14:03 +01:00
if(urlencode) {
struct dynbuf enc;
Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
if(urlencode_str(&enc, *part, partlen, TRUE,
what == CURLUPART_QUERY))
return CURLUE_OUT_OF_MEMORY;
free(*part);
*part = Curl_dyn_ptr(&enc);
}
2023-07-02 19:51:09 +02:00
else if(punycode) {
if(!Curl_is_ASCII_name(u->host)) {
#ifndef USE_IDN
return CURLUE_LACKS_IDN;
#else
char *allochost = Curl_idn_decode(*part);
if(!allochost)
return CURLUE_OUT_OF_MEMORY;
free(*part);
*part = allochost;
#endif
}
}
2022-11-16 20:14:03 +01:00
2018-11-29 20:27:00 +01:00
return CURLUE_OK;
}
else
return ifmissing;
}
CURLUcode curl_url_set(CURLU *u, CURLUPart what,
const char *part, unsigned int flags)
{
char **storep = NULL;
long port = 0;
bool urlencode = (flags & CURLU_URLENCODE)? 1 : 0;
bool plusencode = FALSE;
bool urlskipslash = FALSE;
bool appendquery = FALSE;
2019-11-11 23:01:05 +01:00
bool equalsencode = FALSE;
2018-11-29 20:27:00 +01:00
if(!u)
return CURLUE_BAD_HANDLE;
if(!part) {
/* setting a part to NULL clears it */
switch(what) {
case CURLUPART_URL:
break;
case CURLUPART_SCHEME:
storep = &u->scheme;
break;
case CURLUPART_USER:
storep = &u->user;
break;
case CURLUPART_PASSWORD:
storep = &u->password;
break;
case CURLUPART_OPTIONS:
storep = &u->options;
break;
case CURLUPART_HOST:
storep = &u->host;
break;
2019-11-11 23:01:05 +01:00
case CURLUPART_ZONEID:
storep = &u->zoneid;
break;
2018-11-29 20:27:00 +01:00
case CURLUPART_PORT:
2019-11-11 23:01:05 +01:00
u->portnum = 0;
2018-11-29 20:27:00 +01:00
storep = &u->port;
break;
case CURLUPART_PATH:
storep = &u->path;
break;
case CURLUPART_QUERY:
storep = &u->query;
break;
case CURLUPART_FRAGMENT:
storep = &u->fragment;
break;
default:
return CURLUE_UNKNOWN_PART;
}
if(storep && *storep) {
2021-09-14 00:13:48 +02:00
Curl_safefree(*storep);
2018-11-29 20:27:00 +01:00
}
2022-11-16 20:14:03 +01:00
else if(!storep) {
free_urlhandle(u);
memset(u, 0, sizeof(struct Curl_URL));
}
2018-11-29 20:27:00 +01:00
return CURLUE_OK;
}
switch(what) {
2023-07-02 19:51:09 +02:00
case CURLUPART_SCHEME: {
size_t plen = strlen(part);
const char *s = part;
if((plen > MAX_SCHEME_LEN) || (plen < 1))
/* too long or too short */
2022-03-29 21:10:50 +02:00
return CURLUE_BAD_SCHEME;
2018-11-29 20:27:00 +01:00
if(!(flags & CURLU_NON_SUPPORT_SCHEME) &&
/* verify that it is a fine scheme */
2022-11-16 20:14:03 +01:00
!Curl_builtin_scheme(part, CURL_ZERO_TERMINATED))
2018-11-29 20:27:00 +01:00
return CURLUE_UNSUPPORTED_SCHEME;
storep = &u->scheme;
urlencode = FALSE; /* never */
2023-07-02 19:51:09 +02:00
/* ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) */
while(plen--) {
if(ISALNUM(*s) || (*s == '+') || (*s == '-') || (*s == '.'))
s++; /* fine */
else
return CURLUE_BAD_SCHEME;
}
2018-11-29 20:27:00 +01:00
break;
2023-07-02 19:51:09 +02:00
}
2018-11-29 20:27:00 +01:00
case CURLUPART_USER:
storep = &u->user;
break;
case CURLUPART_PASSWORD:
storep = &u->password;
break;
case CURLUPART_OPTIONS:
storep = &u->options;
break;
2023-07-02 19:51:09 +02:00
case CURLUPART_HOST:
2018-11-29 20:27:00 +01:00
storep = &u->host;
2021-09-14 00:13:48 +02:00
Curl_safefree(u->zoneid);
2019-11-11 23:01:05 +01:00
break;
case CURLUPART_ZONEID:
storep = &u->zoneid;
2018-11-29 20:27:00 +01:00
break;
case CURLUPART_PORT:
2019-11-11 23:01:05 +01:00
{
char *endp;
2018-11-29 20:27:00 +01:00
urlencode = FALSE; /* never */
2019-11-11 23:01:05 +01:00
port = strtol(part, &endp, 10); /* Port number must be decimal */
2018-11-29 20:27:00 +01:00
if((port <= 0) || (port > 0xffff))
return CURLUE_BAD_PORT_NUMBER;
2019-11-11 23:01:05 +01:00
if(*endp)
/* weirdly provided number, not good! */
2022-03-29 21:10:50 +02:00
return CURLUE_BAD_PORT_NUMBER;
2018-11-29 20:27:00 +01:00
storep = &u->port;
2019-11-11 23:01:05 +01:00
}
break;
2018-11-29 20:27:00 +01:00
case CURLUPART_PATH:
urlskipslash = TRUE;
storep = &u->path;
break;
case CURLUPART_QUERY:
plusencode = urlencode;
appendquery = (flags & CURLU_APPENDQUERY)?1:0;
2019-11-11 23:01:05 +01:00
equalsencode = appendquery;
2018-11-29 20:27:00 +01:00
storep = &u->query;
break;
case CURLUPART_FRAGMENT:
storep = &u->fragment;
break;
case CURLUPART_URL: {
/*
* Allow a new URL to replace the existing (if any) contents.
*
* If the existing contents is enough for a URL, allow a relative URL to
* replace it.
*/
CURLUcode result;
char *oldurl;
char *redired_url;
2022-08-04 22:12:04 +02:00
/* if the new thing is absolute or the old one is not
* (we could not get an absolute url in 'oldurl'),
* then replace the existing with the new. */
2022-11-16 20:14:03 +01:00
if(Curl_is_absolute_url(part, NULL, 0,
flags & (CURLU_GUESS_SCHEME|
CURLU_DEFAULT_SCHEME))
2022-08-04 22:12:04 +02:00
|| curl_url_get(u, CURLUPART_URL, &oldurl, flags)) {
return parseurl_and_replace(part, u, flags);
2018-11-29 20:27:00 +01:00
}
2022-08-04 22:12:04 +02:00
/* apply the relative part to create a new URL
* and replace the existing one with it. */
2019-11-11 23:01:05 +01:00
redired_url = concat_url(oldurl, part);
2018-11-29 20:27:00 +01:00
free(oldurl);
if(!redired_url)
return CURLUE_OUT_OF_MEMORY;
2022-08-04 22:12:04 +02:00
result = parseurl_and_replace(redired_url, u, flags);
2018-11-29 20:27:00 +01:00
free(redired_url);
return result;
}
default:
return CURLUE_UNKNOWN_PART;
}
2020-08-30 11:54:41 +02:00
DEBUGASSERT(storep);
{
2018-11-29 20:27:00 +01:00
const char *newp = part;
size_t nalloc = strlen(part);
2019-11-11 23:01:05 +01:00
if(nalloc > CURL_MAX_INPUT_LENGTH)
/* excessive input length */
return CURLUE_MALFORMED_INPUT;
2018-11-29 20:27:00 +01:00
if(urlencode) {
2019-11-11 23:01:05 +01:00
const unsigned char *i;
2022-11-16 20:14:03 +01:00
struct dynbuf enc;
Curl_dyn_init(&enc, nalloc * 3 + 1);
for(i = (const unsigned char *)part; *i; i++) {
CURLcode result;
2021-09-14 00:13:48 +02:00
if((*i == ' ') && plusencode) {
2022-11-16 20:14:03 +01:00
result = Curl_dyn_addn(&enc, "+", 1);
if(result)
return CURLUE_OUT_OF_MEMORY;
2021-09-14 00:13:48 +02:00
}
else if(Curl_isunreserved(*i) ||
((*i == '/') && urlskipslash) ||
((*i == '=') && equalsencode)) {
2019-11-11 23:01:05 +01:00
if((*i == '=') && equalsencode)
/* only skip the first equals sign */
equalsencode = FALSE;
2022-11-16 20:14:03 +01:00
result = Curl_dyn_addn(&enc, i, 1);
if(result)
return CURLUE_OUT_OF_MEMORY;
2018-11-29 20:27:00 +01:00
}
else {
2023-07-02 19:51:09 +02:00
char out[3]={'%'};
out[1] = hexdigits[*i>>4];
out[2] = hexdigits[*i & 0xf];
result = Curl_dyn_addn(&enc, out, 3);
2022-11-16 20:14:03 +01:00
if(result)
return CURLUE_OUT_OF_MEMORY;
2018-11-29 20:27:00 +01:00
}
}
2022-11-16 20:14:03 +01:00
newp = Curl_dyn_ptr(&enc);
2018-11-29 20:27:00 +01:00
}
else {
char *p;
newp = strdup(part);
if(!newp)
return CURLUE_OUT_OF_MEMORY;
p = (char *)newp;
while(*p) {
/* make sure percent encoded are lower case */
if((*p == '%') && ISXDIGIT(p[1]) && ISXDIGIT(p[2]) &&
(ISUPPER(p[1]) || ISUPPER(p[2]))) {
2022-11-16 20:14:03 +01:00
p[1] = Curl_raw_tolower(p[1]);
p[2] = Curl_raw_tolower(p[2]);
2018-11-29 20:27:00 +01:00
p += 3;
}
else
p++;
}
}
if(appendquery) {
2022-11-16 20:14:03 +01:00
/* Append the 'newp' string onto the old query. Add a '&' separator if
none is present at the end of the existing query already */
2018-11-29 20:27:00 +01:00
size_t querylen = u->query ? strlen(u->query) : 0;
bool addamperand = querylen && (u->query[querylen -1] != '&');
if(querylen) {
2022-11-16 20:14:03 +01:00
struct dynbuf enc;
Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
if(Curl_dyn_addn(&enc, u->query, querylen)) /* add original query */
goto nomem;
if(addamperand) {
if(Curl_dyn_addn(&enc, "&", 1))
goto nomem;
2018-11-29 20:27:00 +01:00
}
2022-11-16 20:14:03 +01:00
if(Curl_dyn_add(&enc, newp))
goto nomem;
2018-11-29 20:27:00 +01:00
free((char *)newp);
free(*storep);
2022-11-16 20:14:03 +01:00
*storep = Curl_dyn_ptr(&enc);
2018-11-29 20:27:00 +01:00
return CURLUE_OK;
2023-07-02 19:51:09 +02:00
nomem:
2022-11-16 20:14:03 +01:00
free((char *)newp);
return CURLUE_OUT_OF_MEMORY;
2018-11-29 20:27:00 +01:00
}
}
2019-11-11 23:01:05 +01:00
if(what == CURLUPART_HOST) {
2022-11-16 20:14:03 +01:00
size_t n = strlen(newp);
if(!n && (flags & CURLU_NO_AUTHORITY)) {
2020-08-30 11:54:41 +02:00
/* Skip hostname check, it's allowed to be empty. */
}
else {
2023-07-02 19:51:09 +02:00
if(!n || hostname_check(u, (char *)newp, n)) {
2020-08-30 11:54:41 +02:00
free((char *)newp);
2022-03-29 21:10:50 +02:00
return CURLUE_BAD_HOSTNAME;
2020-08-30 11:54:41 +02:00
}
2019-11-11 23:01:05 +01:00
}
}
2018-11-29 20:27:00 +01:00
free(*storep);
*storep = (char *)newp;
}
/* set after the string, to make it not assigned if the allocation above
fails */
if(port)
u->portnum = port;
return CURLUE_OK;
}