Blame - util/uri.c - qemu

blob: e348c1768c4adbd3a7bbdf1a40da2a3632306de8 [file] [log] [blame]

Paolo Bonzini	ca0defb	2012-09-24 14:42:02 +0530	[diff] [blame]	1	/**
				2	* uri.c: set of generic URI related routines
				3	*
				4	* Reference: RFCs 3986, 2732 and 2373
				5	*
				6	* Copyright (C) 1998-2003 Daniel Veillard. All Rights Reserved.
				7	*
				8	* Permission is hereby granted, free of charge, to any person obtaining a copy
				9	* of this software and associated documentation files (the "Software"), to deal
				10	* in the Software without restriction, including without limitation the rights
				11	* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
				12	* copies of the Software, and to permit persons to whom the Software is
				13	* furnished to do so, subject to the following conditions:
				14	*
				15	* The above copyright notice and this permission notice shall be included in
				16	* all copies or substantial portions of the Software.
				17	*
				18	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				19	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				20	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				21	* DANIEL VEILLARD BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
				22	* IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
				23	* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
				24	*
				25	* Except as contained in this notice, the name of Daniel Veillard shall not
				26	* be used in advertising or otherwise to promote the sale, use or other
				27	* dealings in this Software without prior written authorization from him.
				28	*
				29	* daniel@veillard.com
				30	*
				31	**
				32	*
				33	* Copyright (C) 2007, 2009-2010 Red Hat, Inc.
				34	*
				35	* This library is free software; you can redistribute it and/or
				36	* modify it under the terms of the GNU Lesser General Public
				37	* License as published by the Free Software Foundation; either
				38	* version 2.1 of the License, or (at your option) any later version.
				39	*
				40	* This library is distributed in the hope that it will be useful,
				41	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				42	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				43	* Lesser General Public License for more details.
				44	*
				45	* You should have received a copy of the GNU Lesser General Public
				46	* License along with this library; if not, write to the Free Software
				47	* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
				48	*
				49	* Authors:
				50	* Richard W.M. Jones <rjones@redhat.com>
				51	*
				52	*/
				53
				54	#include <glib.h>
				55	#include <string.h>
				56	#include <stdio.h>
				57
Paolo Bonzini	1de7afc	2012-12-17 18:20:00 +0100	[diff] [blame]	58	#include "qemu/uri.h"
Paolo Bonzini	ca0defb	2012-09-24 14:42:02 +0530	[diff] [blame]	59
				60	static void uri_clean(URI *uri);
				61
				62	/*
				63	* Old rule from 2396 used in legacy handling code
				64	* alpha = lowalpha \| upalpha
				65	*/
				66	#define IS_ALPHA(x) (IS_LOWALPHA(x) \|\| IS_UPALPHA(x))
				67
				68
				69	/*
				70	* lowalpha = "a" \| "b" \| "c" \| "d" \| "e" \| "f" \| "g" \| "h" \| "i" \| "j" \|
				71	* "k" \| "l" \| "m" \| "n" \| "o" \| "p" \| "q" \| "r" \| "s" \| "t" \|
				72	* "u" \| "v" \| "w" \| "x" \| "y" \| "z"
				73	*/
				74
				75	#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
				76
				77	/*
				78	* upalpha = "A" \| "B" \| "C" \| "D" \| "E" \| "F" \| "G" \| "H" \| "I" \| "J" \|
				79	* "K" \| "L" \| "M" \| "N" \| "O" \| "P" \| "Q" \| "R" \| "S" \| "T" \|
				80	* "U" \| "V" \| "W" \| "X" \| "Y" \| "Z"
				81	*/
				82	#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
				83
				84	#ifdef IS_DIGIT
				85	#undef IS_DIGIT
				86	#endif
				87	/*
				88	* digit = "0" \| "1" \| "2" \| "3" \| "4" \| "5" \| "6" \| "7" \| "8" \| "9"
				89	*/
				90	#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
				91
				92	/*
				93	* alphanum = alpha \| digit
				94	*/
				95
				96	#define IS_ALPHANUM(x) (IS_ALPHA(x) \|\| IS_DIGIT(x))
				97
				98	/*
				99	* mark = "-" \| "_" \| "." \| "!" \| "~" \| "*" \| "'" \| "(" \| ")"
				100	*/
				101
				102	#define IS_MARK(x) (((x) == '-') \|\| ((x) == '_') \|\| ((x) == '.') \|\| \
				103	((x) == '!') \|\| ((x) == '~') \|\| ((x) == '*') \|\| ((x) == '\'') \|\| \
				104	((x) == '(') \|\| ((x) == ')'))
				105
				106	/*
				107	* unwise = "{" \| "}" \| "\|" \| "\" \| "^" \| "`"
				108	*/
				109
				110	#define IS_UNWISE(p) \
				111	((((p) == '{')) \|\| (((p) == '}')) \|\| ((*(p) == '\|')) \|\| \
				112	(((p) == '\\')) \|\| (((p) == '^')) \|\| ((*(p) == '[')) \|\| \
				113	(((p) == ']')) \|\| (((p) == '`')))
				114	/*
				115	* reserved = ";" \| "/" \| "?" \| ":" \| "@" \| "&" \| "=" \| "+" \| "$" \| "," \|
				116	* "[" \| "]"
				117	*/
				118
				119	#define IS_RESERVED(x) (((x) == ';') \|\| ((x) == '/') \|\| ((x) == '?') \|\| \
				120	((x) == ':') \|\| ((x) == '@') \|\| ((x) == '&') \|\| ((x) == '=') \|\| \
				121	((x) == '+') \|\| ((x) == '$') \|\| ((x) == ',') \|\| ((x) == '[') \|\| \
				122	((x) == ']'))
				123
				124	/*
				125	* unreserved = alphanum \| mark
				126	*/
				127
				128	#define IS_UNRESERVED(x) (IS_ALPHANUM(x) \|\| IS_MARK(x))
				129
				130	/*
				131	* Skip to next pointer char, handle escaped sequences
				132	*/
				133
				134	#define NEXT(p) ((*p == '%')? p += 3 : p++)
				135
				136	/*
				137	* Productions from the spec.
				138	*
				139	* authority = server \| reg_name
				140	* reg_name = 1*( unreserved \| escaped \| "$" \| "," \|
				141	* ";" \| ":" \| "@" \| "&" \| "=" \| "+" )
				142	*
				143	* path = [ abs_path \| opaque_part ]
				144	*/
				145
				146
				147	/************************************************************************
				148	* *
				149	* RFC 3986 parser *
				150	* *
				151	************************************************************************/
				152
				153	#define ISA_DIGIT(p) (((p) >= '0') && ((p) <= '9'))
				154	#define ISA_ALPHA(p) ((((p) >= 'a') && ((p) <= 'z')) \|\| \
				155	(((p) >= 'A') && ((p) <= 'Z')))
				156	#define ISA_HEXDIG(p) \
				157	(ISA_DIGIT(p) \|\| (((p) >= 'a') && ((p) <= 'f')) \|\| \
				158	(((p) >= 'A') && ((p) <= 'F')))
				159
				160	/*
				161	* sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
				162	* / "*" / "+" / "," / ";" / "="
				163	*/
				164	#define ISA_SUB_DELIM(p) \
				165	((((p) == '!')) \|\| (((p) == '$')) \|\| ((*(p) == '&')) \|\| \
				166	(((p) == '(')) \|\| (((p) == ')')) \|\| (((p) == '')) \|\| \
				167	(((p) == '+')) \|\| (((p) == ',')) \|\| ((*(p) == ';')) \|\| \
				168	(((p) == '=')) \|\| (((p) == '\'')))
				169
				170	/*
				171	* gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
				172	*/
				173	#define ISA_GEN_DELIM(p) \
				174	((((p) == ':')) \|\| (((p) == '/')) \|\| ((*(p) == '?')) \|\| \
				175	(((p) == '#')) \|\| (((p) == '[')) \|\| ((*(p) == ']')) \|\| \
				176	((*(p) == '@')))
				177
				178	/*
				179	* reserved = gen-delims / sub-delims
				180	*/
				181	#define ISA_RESERVED(p) (ISA_GEN_DELIM(p) \|\| (ISA_SUB_DELIM(p)))
				182
				183	/*
				184	* unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
				185	*/
				186	#define ISA_UNRESERVED(p) \
				187	((ISA_ALPHA(p)) \|\| (ISA_DIGIT(p)) \|\| ((*(p) == '-')) \|\| \
				188	(((p) == '.')) \|\| (((p) == '_')) \|\| ((*(p) == '~')))
				189
				190	/*
				191	* pct-encoded = "%" HEXDIG HEXDIG
				192	*/
				193	#define ISA_PCT_ENCODED(p) \
				194	((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
				195
				196	/*
				197	* pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
				198	*/
				199	#define ISA_PCHAR(p) \
				200	(ISA_UNRESERVED(p) \|\| ISA_PCT_ENCODED(p) \|\| ISA_SUB_DELIM(p) \|\| \
				201	(((p) == ':')) \|\| (((p) == '@')))
				202
				203	/**
				204	* rfc3986_parse_scheme:
				205	* @uri: pointer to an URI structure
				206	* @str: pointer to the string to analyze
				207	*
				208	* Parse an URI scheme
				209	*
				210	* ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
				211	*
				212	* Returns 0 or the error code
				213	*/
				214	static int
				215	rfc3986_parse_scheme(URI uri, const char *str) {
				216	const char *cur;
				217
				218	if (str == NULL)
				219	return(-1);
				220
				221	cur = *str;
				222	if (!ISA_ALPHA(cur))
				223	return(2);
				224	cur++;
				225	while (ISA_ALPHA(cur) \|\| ISA_DIGIT(cur) \|\|
				226	(cur == '+') \|\| (cur == '-') \|\| (*cur == '.')) cur++;
				227	if (uri != NULL) {
				228	if (uri->scheme != NULL) g_free(uri->scheme);
				229	uri->scheme = g_strndup(str, cur - str);
				230	}
				231	*str = cur;
				232	return(0);
				233	}
				234
				235	/**
				236	* rfc3986_parse_fragment:
				237	* @uri: pointer to an URI structure
				238	* @str: pointer to the string to analyze
				239	*
				240	* Parse the query part of an URI
				241	*
				242	* fragment = *( pchar / "/" / "?" )
				243	* NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
				244	* in the fragment identifier but this is used very broadly for
				245	* xpointer scheme selection, so we are allowing it here to not break
				246	* for example all the DocBook processing chains.
				247	*
				248	* Returns 0 or the error code
				249	*/
				250	static int
				251	rfc3986_parse_fragment(URI uri, const char *str)
				252	{
				253	const char *cur;
				254
				255	if (str == NULL)
				256	return (-1);
				257
				258	cur = *str;
				259
				260	while ((ISA_PCHAR(cur)) \|\| (cur == '/') \|\| (cur == '?') \|\|
				261	(cur == '[') \|\| (cur == ']') \|\|
				262	((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
				263	NEXT(cur);
				264	if (uri != NULL) {
				265	if (uri->fragment != NULL)
				266	g_free(uri->fragment);
				267	if (uri->cleanup & 2)
				268	uri->fragment = g_strndup(str, cur - str);
				269	else
				270	uri->fragment = uri_string_unescape(str, cur - str, NULL);
				271	}
				272	*str = cur;
				273	return (0);
				274	}
				275
				276	/**
				277	* rfc3986_parse_query:
				278	* @uri: pointer to an URI structure
				279	* @str: pointer to the string to analyze
				280	*
				281	* Parse the query part of an URI
				282	*
				283	* query = *uric
				284	*
				285	* Returns 0 or the error code
				286	*/
				287	static int
				288	rfc3986_parse_query(URI uri, const char *str)
				289	{
				290	const char *cur;
				291
				292	if (str == NULL)
				293	return (-1);
				294
				295	cur = *str;
				296
				297	while ((ISA_PCHAR(cur)) \|\| (cur == '/') \|\| (cur == '?') \|\|
				298	((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
				299	NEXT(cur);
				300	if (uri != NULL) {
				301	if (uri->query != NULL)
				302	g_free (uri->query);
				303	uri->query = g_strndup (str, cur - str);
				304	}
				305	*str = cur;
				306	return (0);
				307	}
				308
				309	/**
				310	* rfc3986_parse_port:
				311	* @uri: pointer to an URI structure
				312	* @str: the string to analyze
				313	*
				314	* Parse a port part and fills in the appropriate fields
				315	* of the @uri structure
				316	*
				317	* port = *DIGIT
				318	*
				319	* Returns 0 or the error code
				320	*/
				321	static int
				322	rfc3986_parse_port(URI uri, const char *str)
				323	{
				324	const char cur = str;
				325
				326	if (ISA_DIGIT(cur)) {
				327	if (uri != NULL)
				328	uri->port = 0;
				329	while (ISA_DIGIT(cur)) {
				330	if (uri != NULL)
				331	uri->port = uri->port * 10 + (*cur - '0');
				332	cur++;
				333	}
				334	*str = cur;
				335	return(0);
				336	}
				337	return(1);
				338	}
				339
				340	/**
				341	* rfc3986_parse_user_info:
				342	* @uri: pointer to an URI structure
				343	* @str: the string to analyze
				344	*
				345	* Parse an user informations part and fills in the appropriate fields
				346	* of the @uri structure
				347	*
				348	* userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
				349	*
				350	* Returns 0 or the error code
				351	*/
				352	static int
				353	rfc3986_parse_user_info(URI uri, const char *str)
				354	{
				355	const char *cur;
				356
				357	cur = *str;
				358	while (ISA_UNRESERVED(cur) \|\| ISA_PCT_ENCODED(cur) \|\|
				359	ISA_SUB_DELIM(cur) \|\| (*cur == ':'))
				360	NEXT(cur);
				361	if (*cur == '@') {
				362	if (uri != NULL) {
				363	if (uri->user != NULL) g_free(uri->user);
				364	if (uri->cleanup & 2)
				365	uri->user = g_strndup(str, cur - str);
				366	else
				367	uri->user = uri_string_unescape(str, cur - str, NULL);
				368	}
				369	*str = cur;
				370	return(0);
				371	}
				372	return(1);
				373	}
				374
				375	/**
				376	* rfc3986_parse_dec_octet:
				377	* @str: the string to analyze
				378	*
				379	* dec-octet = DIGIT ; 0-9
				380	* / %x31-39 DIGIT ; 10-99
				381	* / "1" 2DIGIT ; 100-199
				382	* / "2" %x30-34 DIGIT ; 200-249
				383	* / "25" %x30-35 ; 250-255
				384	*
				385	* Skip a dec-octet.
				386	*
				387	* Returns 0 if found and skipped, 1 otherwise
				388	*/
				389	static int
				390	rfc3986_parse_dec_octet(const char **str) {
				391	const char cur = str;
				392
				393	if (!(ISA_DIGIT(cur)))
				394	return(1);
				395	if (!ISA_DIGIT(cur+1))
				396	cur++;
				397	else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
				398	cur += 2;
				399	else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
				400	cur += 3;
				401	else if ((cur == '2') && ((cur + 1) >= '0') &&
				402	(*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
				403	cur += 3;
				404	else if ((cur == '2') && ((cur + 1) == '5') &&
				405	((cur + 2) >= '0') && ((cur + 1) <= '5'))
				406	cur += 3;
				407	else
				408	return(1);
				409	*str = cur;
				410	return(0);
				411	}
				412	/**
				413	* rfc3986_parse_host:
				414	* @uri: pointer to an URI structure
				415	* @str: the string to analyze
				416	*
				417	* Parse an host part and fills in the appropriate fields
				418	* of the @uri structure
				419	*
				420	* host = IP-literal / IPv4address / reg-name
				421	* IP-literal = "[" ( IPv6address / IPvFuture ) "]"
				422	* IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
				423	* reg-name = *( unreserved / pct-encoded / sub-delims )
				424	*
				425	* Returns 0 or the error code
				426	*/
				427	static int
				428	rfc3986_parse_host(URI uri, const char *str)
				429	{
				430	const char cur = str;
				431	const char *host;
				432
				433	host = cur;
				434	/*
Stefan Weil	a93cf9d	2012-11-02 08:29:53 +0100	[diff] [blame]	435	* IPv6 and future addressing scheme are enclosed between brackets
Paolo Bonzini	ca0defb	2012-09-24 14:42:02 +0530	[diff] [blame]	436	*/
				437	if (*cur == '[') {
				438	cur++;
				439	while ((cur != ']') && (cur != 0))
				440	cur++;
				441	if (*cur != ']')
				442	return(1);
				443	cur++;
				444	goto found;
				445	}
				446	/*
				447	* try to parse an IPv4
				448	*/
				449	if (ISA_DIGIT(cur)) {
				450	if (rfc3986_parse_dec_octet(&cur) != 0)
				451	goto not_ipv4;
				452	if (*cur != '.')
				453	goto not_ipv4;
				454	cur++;
				455	if (rfc3986_parse_dec_octet(&cur) != 0)
				456	goto not_ipv4;
				457	if (*cur != '.')
				458	goto not_ipv4;
				459	if (rfc3986_parse_dec_octet(&cur) != 0)
				460	goto not_ipv4;
				461	if (*cur != '.')
				462	goto not_ipv4;
				463	if (rfc3986_parse_dec_octet(&cur) != 0)
				464	goto not_ipv4;
				465	goto found;
				466	not_ipv4:
				467	cur = *str;
				468	}
				469	/*
				470	* then this should be a hostname which can be empty
				471	*/
				472	while (ISA_UNRESERVED(cur) \|\| ISA_PCT_ENCODED(cur) \|\| ISA_SUB_DELIM(cur))
				473	NEXT(cur);
				474	found:
				475	if (uri != NULL) {
				476	if (uri->authority != NULL) g_free(uri->authority);
				477	uri->authority = NULL;
				478	if (uri->server != NULL) g_free(uri->server);
				479	if (cur != host) {
				480	if (uri->cleanup & 2)
				481	uri->server = g_strndup(host, cur - host);
				482	else
				483	uri->server = uri_string_unescape(host, cur - host, NULL);
				484	} else
				485	uri->server = NULL;
				486	}
				487	*str = cur;
				488	return(0);
				489	}
				490
				491	/**
				492	* rfc3986_parse_authority:
				493	* @uri: pointer to an URI structure
				494	* @str: the string to analyze
				495	*
				496	* Parse an authority part and fills in the appropriate fields
				497	* of the @uri structure
				498	*
				499	* authority = [ userinfo "@" ] host [ ":" port ]
				500	*
				501	* Returns 0 or the error code
				502	*/
				503	static int
				504	rfc3986_parse_authority(URI uri, const char *str)
				505	{
				506	const char *cur;
				507	int ret;
				508
				509	cur = *str;
				510	/*
				511	* try to parse an userinfo and check for the trailing @
				512	*/
				513	ret = rfc3986_parse_user_info(uri, &cur);
				514	if ((ret != 0) \|\| (*cur != '@'))
				515	cur = *str;
				516	else
				517	cur++;
				518	ret = rfc3986_parse_host(uri, &cur);
				519	if (ret != 0) return(ret);
				520	if (*cur == ':') {
				521	cur++;
				522	ret = rfc3986_parse_port(uri, &cur);
				523	if (ret != 0) return(ret);
				524	}
				525	*str = cur;
				526	return(0);
				527	}
				528
				529	/**
				530	* rfc3986_parse_segment:
				531	* @str: the string to analyze
				532	* @forbid: an optional forbidden character
				533	* @empty: allow an empty segment
				534	*
				535	* Parse a segment and fills in the appropriate fields
				536	* of the @uri structure
				537	*
				538	* segment = *pchar
				539	* segment-nz = 1*pchar
				540	* segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
				541	* ; non-zero-length segment without any colon ":"
				542	*
				543	* Returns 0 or the error code
				544	*/
				545	static int
				546	rfc3986_parse_segment(const char **str, char forbid, int empty)
				547	{
				548	const char *cur;
				549
				550	cur = *str;
				551	if (!ISA_PCHAR(cur)) {
				552	if (empty)
				553	return(0);
				554	return(1);
				555	}
				556	while (ISA_PCHAR(cur) && (*cur != forbid))
				557	NEXT(cur);
				558	*str = cur;
				559	return (0);
				560	}
				561
				562	/**
				563	* rfc3986_parse_path_ab_empty:
				564	* @uri: pointer to an URI structure
				565	* @str: the string to analyze
				566	*
				567	* Parse an path absolute or empty and fills in the appropriate fields
				568	* of the @uri structure
				569	*
				570	* path-abempty = *( "/" segment )
				571	*
				572	* Returns 0 or the error code
				573	*/
				574	static int
				575	rfc3986_parse_path_ab_empty(URI uri, const char *str)
				576	{
				577	const char *cur;
				578	int ret;
				579
				580	cur = *str;
				581
				582	while (*cur == '/') {
				583	cur++;
				584	ret = rfc3986_parse_segment(&cur, 0, 1);
				585	if (ret != 0) return(ret);
				586	}
				587	if (uri != NULL) {
				588	if (uri->path != NULL) g_free(uri->path);
				589	if (*str != cur) {
				590	if (uri->cleanup & 2)
				591	uri->path = g_strndup(str, cur - str);
				592	else
				593	uri->path = uri_string_unescape(str, cur - str, NULL);
				594	} else {
				595	uri->path = NULL;
				596	}
				597	}
				598	*str = cur;
				599	return (0);
				600	}
				601
				602	/**
				603	* rfc3986_parse_path_absolute:
				604	* @uri: pointer to an URI structure
				605	* @str: the string to analyze
				606	*
				607	* Parse an path absolute and fills in the appropriate fields
				608	* of the @uri structure
				609	*
				610	* path-absolute = "/" [ segment-nz *( "/" segment ) ]
				611	*
				612	* Returns 0 or the error code
				613	*/
				614	static int
				615	rfc3986_parse_path_absolute(URI uri, const char *str)
				616	{
				617	const char *cur;
				618	int ret;
				619
				620	cur = *str;
				621
				622	if (*cur != '/')
				623	return(1);
				624	cur++;
				625	ret = rfc3986_parse_segment(&cur, 0, 0);
				626	if (ret == 0) {
				627	while (*cur == '/') {
				628	cur++;
				629	ret = rfc3986_parse_segment(&cur, 0, 1);
				630	if (ret != 0) return(ret);
				631	}
				632	}
				633	if (uri != NULL) {
				634	if (uri->path != NULL) g_free(uri->path);
				635	if (cur != *str) {
				636	if (uri->cleanup & 2)
				637	uri->path = g_strndup(str, cur - str);
				638	else
				639	uri->path = uri_string_unescape(str, cur - str, NULL);
				640	} else {
				641	uri->path = NULL;
				642	}
				643	}
				644	*str = cur;
				645	return (0);
				646	}
				647
				648	/**
				649	* rfc3986_parse_path_rootless:
				650	* @uri: pointer to an URI structure
				651	* @str: the string to analyze
				652	*
				653	* Parse an path without root and fills in the appropriate fields
				654	* of the @uri structure
				655	*
				656	* path-rootless = segment-nz *( "/" segment )
				657	*
				658	* Returns 0 or the error code
				659	*/
				660	static int
				661	rfc3986_parse_path_rootless(URI uri, const char *str)
				662	{
				663	const char *cur;
				664	int ret;
				665
				666	cur = *str;
				667
				668	ret = rfc3986_parse_segment(&cur, 0, 0);
				669	if (ret != 0) return(ret);
				670	while (*cur == '/') {
				671	cur++;
				672	ret = rfc3986_parse_segment(&cur, 0, 1);
				673	if (ret != 0) return(ret);
				674	}
				675	if (uri != NULL) {
				676	if (uri->path != NULL) g_free(uri->path);
				677	if (cur != *str) {
				678	if (uri->cleanup & 2)
				679	uri->path = g_strndup(str, cur - str);
				680	else
				681	uri->path = uri_string_unescape(str, cur - str, NULL);
				682	} else {
				683	uri->path = NULL;
				684	}
				685	}
				686	*str = cur;
				687	return (0);
				688	}
				689
				690	/**
				691	* rfc3986_parse_path_no_scheme:
				692	* @uri: pointer to an URI structure
				693	* @str: the string to analyze
				694	*
				695	* Parse an path which is not a scheme and fills in the appropriate fields
				696	* of the @uri structure
				697	*
				698	* path-noscheme = segment-nz-nc *( "/" segment )
				699	*
				700	* Returns 0 or the error code
				701	*/
				702	static int
				703	rfc3986_parse_path_no_scheme(URI uri, const char *str)
				704	{
				705	const char *cur;
				706	int ret;
				707
				708	cur = *str;
				709
				710	ret = rfc3986_parse_segment(&cur, ':', 0);
				711	if (ret != 0) return(ret);
				712	while (*cur == '/') {
				713	cur++;
				714	ret = rfc3986_parse_segment(&cur, 0, 1);
				715	if (ret != 0) return(ret);
				716	}
				717	if (uri != NULL) {
				718	if (uri->path != NULL) g_free(uri->path);
				719	if (cur != *str) {
				720	if (uri->cleanup & 2)
				721	uri->path = g_strndup(str, cur - str);
				722	else
				723	uri->path = uri_string_unescape(str, cur - str, NULL);
				724	} else {
				725	uri->path = NULL;
				726	}
				727	}
				728	*str = cur;
				729	return (0);
				730	}
				731
				732	/**
				733	* rfc3986_parse_hier_part:
				734	* @uri: pointer to an URI structure
				735	* @str: the string to analyze
				736	*
				737	* Parse an hierarchical part and fills in the appropriate fields
				738	* of the @uri structure
				739	*
				740	* hier-part = "//" authority path-abempty
				741	* / path-absolute
				742	* / path-rootless
				743	* / path-empty
				744	*
				745	* Returns 0 or the error code
				746	*/
				747	static int
				748	rfc3986_parse_hier_part(URI uri, const char *str)
				749	{
				750	const char *cur;
				751	int ret;
				752
				753	cur = *str;
				754
				755	if ((cur == '/') && ((cur + 1) == '/')) {
				756	cur += 2;
				757	ret = rfc3986_parse_authority(uri, &cur);
				758	if (ret != 0) return(ret);
				759	ret = rfc3986_parse_path_ab_empty(uri, &cur);
				760	if (ret != 0) return(ret);
				761	*str = cur;
				762	return(0);
				763	} else if (*cur == '/') {
				764	ret = rfc3986_parse_path_absolute(uri, &cur);
				765	if (ret != 0) return(ret);
				766	} else if (ISA_PCHAR(cur)) {
				767	ret = rfc3986_parse_path_rootless(uri, &cur);
				768	if (ret != 0) return(ret);
				769	} else {
				770	/* path-empty is effectively empty */
				771	if (uri != NULL) {
				772	if (uri->path != NULL) g_free(uri->path);
				773	uri->path = NULL;
				774	}
				775	}
				776	*str = cur;
				777	return (0);
				778	}
				779
				780	/**
				781	* rfc3986_parse_relative_ref:
				782	* @uri: pointer to an URI structure
				783	* @str: the string to analyze
				784	*
				785	* Parse an URI string and fills in the appropriate fields
				786	* of the @uri structure
				787	*
				788	* relative-ref = relative-part [ "?" query ] [ "#" fragment ]
				789	* relative-part = "//" authority path-abempty
				790	* / path-absolute
				791	* / path-noscheme
				792	* / path-empty
				793	*
				794	* Returns 0 or the error code
				795	*/
				796	static int
				797	rfc3986_parse_relative_ref(URI uri, const char str) {
				798	int ret;
				799
				800	if ((str == '/') && ((str + 1) == '/')) {
				801	str += 2;
				802	ret = rfc3986_parse_authority(uri, &str);
				803	if (ret != 0) return(ret);
				804	ret = rfc3986_parse_path_ab_empty(uri, &str);
				805	if (ret != 0) return(ret);
				806	} else if (*str == '/') {
				807	ret = rfc3986_parse_path_absolute(uri, &str);
				808	if (ret != 0) return(ret);
				809	} else if (ISA_PCHAR(str)) {
				810	ret = rfc3986_parse_path_no_scheme(uri, &str);
				811	if (ret != 0) return(ret);
				812	} else {
				813	/* path-empty is effectively empty */
				814	if (uri != NULL) {
				815	if (uri->path != NULL) g_free(uri->path);
				816	uri->path = NULL;
				817	}
				818	}
				819
				820	if (*str == '?') {
				821	str++;
				822	ret = rfc3986_parse_query(uri, &str);
				823	if (ret != 0) return(ret);
				824	}
				825	if (*str == '#') {
				826	str++;
				827	ret = rfc3986_parse_fragment(uri, &str);
				828	if (ret != 0) return(ret);
				829	}
				830	if (*str != 0) {
				831	uri_clean(uri);
				832	return(1);
				833	}
				834	return(0);
				835	}
				836
				837
				838	/**
				839	* rfc3986_parse:
				840	* @uri: pointer to an URI structure
				841	* @str: the string to analyze
				842	*
				843	* Parse an URI string and fills in the appropriate fields
				844	* of the @uri structure
				845	*
				846	* scheme ":" hier-part [ "?" query ] [ "#" fragment ]
				847	*
				848	* Returns 0 or the error code
				849	*/
				850	static int
				851	rfc3986_parse(URI uri, const char str) {
				852	int ret;
				853
				854	ret = rfc3986_parse_scheme(uri, &str);
				855	if (ret != 0) return(ret);
				856	if (*str != ':') {
				857	return(1);
				858	}
				859	str++;
				860	ret = rfc3986_parse_hier_part(uri, &str);
				861	if (ret != 0) return(ret);
				862	if (*str == '?') {
				863	str++;
				864	ret = rfc3986_parse_query(uri, &str);
				865	if (ret != 0) return(ret);
				866	}
				867	if (*str == '#') {
				868	str++;
				869	ret = rfc3986_parse_fragment(uri, &str);
				870	if (ret != 0) return(ret);
				871	}
				872	if (*str != 0) {
				873	uri_clean(uri);
				874	return(1);
				875	}
				876	return(0);
				877	}
				878
				879	/**
				880	* rfc3986_parse_uri_reference:
				881	* @uri: pointer to an URI structure
				882	* @str: the string to analyze
				883	*
				884	* Parse an URI reference string and fills in the appropriate fields
				885	* of the @uri structure
				886	*
				887	* URI-reference = URI / relative-ref
				888	*
				889	* Returns 0 or the error code
				890	*/
				891	static int
				892	rfc3986_parse_uri_reference(URI uri, const char str) {
				893	int ret;
				894
				895	if (str == NULL)
				896	return(-1);
				897	uri_clean(uri);
				898
				899	/*
				900	* Try first to parse absolute refs, then fallback to relative if
				901	* it fails.
				902	*/
				903	ret = rfc3986_parse(uri, str);
				904	if (ret != 0) {
				905	uri_clean(uri);
				906	ret = rfc3986_parse_relative_ref(uri, str);
				907	if (ret != 0) {
				908	uri_clean(uri);
				909	return(ret);
				910	}
				911	}
				912	return(0);
				913	}
				914
				915	/**
				916	* uri_parse:
				917	* @str: the URI string to analyze
				918	*
				919	* Parse an URI based on RFC 3986
				920	*
				921	* URI-reference = [ absoluteURI \| relativeURI ] [ "#" fragment ]
				922	*
				923	* Returns a newly built URI or NULL in case of error
				924	*/
				925	URI *
				926	uri_parse(const char *str) {
				927	URI *uri;
				928	int ret;
				929
				930	if (str == NULL)
				931	return(NULL);
				932	uri = uri_new();
				933	if (uri != NULL) {
				934	ret = rfc3986_parse_uri_reference(uri, str);
				935	if (ret) {
				936	uri_free(uri);
				937	return(NULL);
				938	}
				939	}
				940	return(uri);
				941	}
				942
				943	/**
				944	* uri_parse_into:
				945	* @uri: pointer to an URI structure
				946	* @str: the string to analyze
				947	*
				948	* Parse an URI reference string based on RFC 3986 and fills in the
				949	* appropriate fields of the @uri structure
				950	*
				951	* URI-reference = URI / relative-ref
				952	*
				953	* Returns 0 or the error code
				954	*/
				955	int
				956	uri_parse_into(URI uri, const char str) {
				957	return(rfc3986_parse_uri_reference(uri, str));
				958	}
				959
				960	/**
				961	* uri_parse_raw:
				962	* @str: the URI string to analyze
				963	* @raw: if 1 unescaping of URI pieces are disabled
				964	*
				965	* Parse an URI but allows to keep intact the original fragments.
				966	*
				967	* URI-reference = URI / relative-ref
				968	*
				969	* Returns a newly built URI or NULL in case of error
				970	*/
				971	URI *
				972	uri_parse_raw(const char *str, int raw) {
				973	URI *uri;
				974	int ret;
				975
				976	if (str == NULL)
				977	return(NULL);
				978	uri = uri_new();
				979	if (uri != NULL) {
				980	if (raw) {
				981	uri->cleanup \|= 2;
				982	}
				983	ret = uri_parse_into(uri, str);
				984	if (ret) {
				985	uri_free(uri);
				986	return(NULL);
				987	}
				988	}
				989	return(uri);
				990	}
				991
				992	/************************************************************************
				993	* *
				994	* Generic URI structure functions *
				995	* *
				996	************************************************************************/
				997
				998	/**
				999	* uri_new:
				1000	*
				1001	* Simply creates an empty URI
				1002	*
				1003	* Returns the new structure or NULL in case of error
				1004	*/
				1005	URI *
				1006	uri_new(void) {
				1007	URI *ret;
				1008
				1009	ret = (URI *) g_malloc(sizeof(URI));
				1010	memset(ret, 0, sizeof(URI));
				1011	return(ret);
				1012	}
				1013
				1014	/**
				1015	* realloc2n:
				1016	*
				1017	* Function to handle properly a reallocation when saving an URI
				1018	* Also imposes some limit on the length of an URI string output
				1019	*/
				1020	static char *
				1021	realloc2n(char ret, int max) {
				1022	char *temp;
				1023	int tmp;
				1024
				1025	tmp = max 2;
				1026	temp = g_realloc(ret, (tmp + 1));
				1027	*max = tmp;
				1028	return(temp);
				1029	}
				1030
				1031	/**
				1032	* uri_to_string:
				1033	* @uri: pointer to an URI
				1034	*
				1035	* Save the URI as an escaped string
				1036	*
				1037	* Returns a new string (to be deallocated by caller)
				1038	*/
				1039	char *
				1040	uri_to_string(URI *uri) {
				1041	char *ret = NULL;
				1042	char *temp;
				1043	const char *p;
				1044	int len;
				1045	int max;
				1046
				1047	if (uri == NULL) return(NULL);
				1048
				1049
				1050	max = 80;
				1051	ret = g_malloc(max + 1);
				1052	len = 0;
				1053
				1054	if (uri->scheme != NULL) {
				1055	p = uri->scheme;
				1056	while (*p != 0) {
				1057	if (len >= max) {
				1058	temp = realloc2n(ret, &max);
				1059	if (temp == NULL) goto mem_error;
				1060	ret = temp;
				1061	}
				1062	ret[len++] = *p++;
				1063	}
				1064	if (len >= max) {
				1065	temp = realloc2n(ret, &max);
				1066	if (temp == NULL) goto mem_error;
				1067	ret = temp;
				1068	}
				1069	ret[len++] = ':';
				1070	}
				1071	if (uri->opaque != NULL) {
				1072	p = uri->opaque;
				1073	while (*p != 0) {
				1074	if (len + 3 >= max) {
				1075	temp = realloc2n(ret, &max);
				1076	if (temp == NULL) goto mem_error;
				1077	ret = temp;
				1078	}
				1079	if (IS_RESERVED((p)) \|\| IS_UNRESERVED((p)))
				1080	ret[len++] = *p++;
				1081	else {
				1082	int val = (unsigned char )p++;
				1083	int hi = val / 0x10, lo = val % 0x10;
				1084	ret[len++] = '%';
				1085	ret[len++] = hi + (hi > 9? 'A'-10 : '0');
				1086	ret[len++] = lo + (lo > 9? 'A'-10 : '0');
				1087	}
				1088	}
				1089	} else {
				1090	if (uri->server != NULL) {
				1091	if (len + 3 >= max) {
				1092	temp = realloc2n(ret, &max);
				1093	if (temp == NULL) goto mem_error;
				1094	ret = temp;
				1095	}
				1096	ret[len++] = '/';
				1097	ret[len++] = '/';
				1098	if (uri->user != NULL) {
				1099	p = uri->user;
				1100	while (*p != 0) {
				1101	if (len + 3 >= max) {
				1102	temp = realloc2n(ret, &max);
				1103	if (temp == NULL) goto mem_error;
				1104	ret = temp;
				1105	}
				1106	if ((IS_UNRESERVED(*(p))) \|\|
				1107	(((p) == ';')) \|\| (((p) == ':')) \|\|
				1108	(((p) == '&')) \|\| (((p) == '=')) \|\|
				1109	(((p) == '+')) \|\| (((p) == '$')) \|\|
				1110	((*(p) == ',')))
				1111	ret[len++] = *p++;
				1112	else {
				1113	int val = (unsigned char )p++;
				1114	int hi = val / 0x10, lo = val % 0x10;
				1115	ret[len++] = '%';
				1116	ret[len++] = hi + (hi > 9? 'A'-10 : '0');
				1117	ret[len++] = lo + (lo > 9? 'A'-10 : '0');
				1118	}
				1119	}
				1120	if (len + 3 >= max) {
				1121	temp = realloc2n(ret, &max);
				1122	if (temp == NULL) goto mem_error;
				1123	ret = temp;
				1124	}
				1125	ret[len++] = '@';
				1126	}
				1127	p = uri->server;
				1128	while (*p != 0) {
				1129	if (len >= max) {
				1130	temp = realloc2n(ret, &max);
				1131	if (temp == NULL) goto mem_error;
				1132	ret = temp;
				1133	}
				1134	ret[len++] = *p++;
				1135	}
				1136	if (uri->port > 0) {
				1137	if (len + 10 >= max) {
				1138	temp = realloc2n(ret, &max);
				1139	if (temp == NULL) goto mem_error;
				1140	ret = temp;
				1141	}
				1142	len += snprintf(&ret[len], max - len, ":%d", uri->port);
				1143	}
				1144	} else if (uri->authority != NULL) {
				1145	if (len + 3 >= max) {
				1146	temp = realloc2n(ret, &max);
				1147	if (temp == NULL) goto mem_error;
				1148	ret = temp;
				1149	}
				1150	ret[len++] = '/';
				1151	ret[len++] = '/';
				1152	p = uri->authority;
				1153	while (*p != 0) {
				1154	if (len + 3 >= max) {
				1155	temp = realloc2n(ret, &max);
				1156	if (temp == NULL) goto mem_error;
				1157	ret = temp;
				1158	}
				1159	if ((IS_UNRESERVED(*(p))) \|\|
				1160	(((p) == '$')) \|\| (((p) == ',')) \|\| ((*(p) == ';')) \|\|
				1161	(((p) == ':')) \|\| (((p) == '@')) \|\| ((*(p) == '&')) \|\|
				1162	(((p) == '=')) \|\| (((p) == '+')))
				1163	ret[len++] = *p++;
				1164	else {
				1165	int val = (unsigned char )p++;
				1166	int hi = val / 0x10, lo = val % 0x10;
				1167	ret[len++] = '%';
				1168	ret[len++] = hi + (hi > 9? 'A'-10 : '0');
				1169	ret[len++] = lo + (lo > 9? 'A'-10 : '0');
				1170	}
				1171	}
				1172	} else if (uri->scheme != NULL) {
				1173	if (len + 3 >= max) {
				1174	temp = realloc2n(ret, &max);
				1175	if (temp == NULL) goto mem_error;
				1176	ret = temp;
				1177	}
				1178	ret[len++] = '/';
				1179	ret[len++] = '/';
				1180	}
				1181	if (uri->path != NULL) {
				1182	p = uri->path;
				1183	/*
				1184	* the colon in file:///d: should not be escaped or
				1185	* Windows accesses fail later.
				1186	*/
				1187	if ((uri->scheme != NULL) &&
				1188	(p[0] == '/') &&
				1189	(((p[1] >= 'a') && (p[1] <= 'z')) \|\|
				1190	((p[1] >= 'A') && (p[1] <= 'Z'))) &&
				1191	(p[2] == ':') &&
				1192	(!strcmp(uri->scheme, "file"))) {
				1193	if (len + 3 >= max) {
				1194	temp = realloc2n(ret, &max);
				1195	if (temp == NULL) goto mem_error;
				1196	ret = temp;
				1197	}
				1198	ret[len++] = *p++;
				1199	ret[len++] = *p++;
				1200	ret[len++] = *p++;
				1201	}
				1202	while (*p != 0) {
				1203	if (len + 3 >= max) {
				1204	temp = realloc2n(ret, &max);
				1205	if (temp == NULL) goto mem_error;
				1206	ret = temp;
				1207	}
				1208	if ((IS_UNRESERVED((p))) \|\| (((p) == '/')) \|\|
				1209	(((p) == ';')) \|\| (((p) == '@')) \|\| ((*(p) == '&')) \|\|
				1210	(((p) == '=')) \|\| (((p) == '+')) \|\| ((*(p) == '$')) \|\|
				1211	((*(p) == ',')))
				1212	ret[len++] = *p++;
				1213	else {
				1214	int val = (unsigned char )p++;
				1215	int hi = val / 0x10, lo = val % 0x10;
				1216	ret[len++] = '%';
				1217	ret[len++] = hi + (hi > 9? 'A'-10 : '0');
				1218	ret[len++] = lo + (lo > 9? 'A'-10 : '0');
				1219	}
				1220	}
				1221	}
				1222	if (uri->query != NULL) {
				1223	if (len + 1 >= max) {
				1224	temp = realloc2n(ret, &max);
				1225	if (temp == NULL) goto mem_error;
				1226	ret = temp;
				1227	}
				1228	ret[len++] = '?';
				1229	p = uri->query;
				1230	while (*p != 0) {
				1231	if (len + 1 >= max) {
				1232	temp = realloc2n(ret, &max);
				1233	if (temp == NULL) goto mem_error;
				1234	ret = temp;
				1235	}
				1236	ret[len++] = *p++;
				1237	}
				1238	}
				1239	}
				1240	if (uri->fragment != NULL) {
				1241	if (len + 3 >= max) {
				1242	temp = realloc2n(ret, &max);
				1243	if (temp == NULL) goto mem_error;
				1244	ret = temp;
				1245	}
				1246	ret[len++] = '#';
				1247	p = uri->fragment;
				1248	while (*p != 0) {
				1249	if (len + 3 >= max) {
				1250	temp = realloc2n(ret, &max);
				1251	if (temp == NULL) goto mem_error;
				1252	ret = temp;
				1253	}
				1254	if ((IS_UNRESERVED((p))) \|\| (IS_RESERVED((p))))
				1255	ret[len++] = *p++;
				1256	else {
				1257	int val = (unsigned char )p++;
				1258	int hi = val / 0x10, lo = val % 0x10;
				1259	ret[len++] = '%';
				1260	ret[len++] = hi + (hi > 9? 'A'-10 : '0');
				1261	ret[len++] = lo + (lo > 9? 'A'-10 : '0');
				1262	}
				1263	}
				1264	}
				1265	if (len >= max) {
				1266	temp = realloc2n(ret, &max);
				1267	if (temp == NULL) goto mem_error;
				1268	ret = temp;
				1269	}
				1270	ret[len] = 0;
				1271	return(ret);
				1272
				1273	mem_error:
				1274	g_free(ret);
				1275	return(NULL);
				1276	}
				1277
				1278	/**
				1279	* uri_clean:
				1280	* @uri: pointer to an URI
				1281	*
				1282	* Make sure the URI struct is free of content
				1283	*/
				1284	static void
				1285	uri_clean(URI *uri) {
				1286	if (uri == NULL) return;
				1287
				1288	if (uri->scheme != NULL) g_free(uri->scheme);
				1289	uri->scheme = NULL;
				1290	if (uri->server != NULL) g_free(uri->server);
				1291	uri->server = NULL;
				1292	if (uri->user != NULL) g_free(uri->user);
				1293	uri->user = NULL;
				1294	if (uri->path != NULL) g_free(uri->path);
				1295	uri->path = NULL;
				1296	if (uri->fragment != NULL) g_free(uri->fragment);
				1297	uri->fragment = NULL;
				1298	if (uri->opaque != NULL) g_free(uri->opaque);
				1299	uri->opaque = NULL;
				1300	if (uri->authority != NULL) g_free(uri->authority);
				1301	uri->authority = NULL;
				1302	if (uri->query != NULL) g_free(uri->query);
				1303	uri->query = NULL;
				1304	}
				1305
				1306	/**
				1307	* uri_free:
				1308	* @uri: pointer to an URI
				1309	*
				1310	* Free up the URI struct
				1311	*/
				1312	void
				1313	uri_free(URI *uri) {
				1314	uri_clean(uri);
				1315	g_free(uri);
				1316	}
				1317
				1318	/************************************************************************
				1319	* *
				1320	* Helper functions *
				1321	* *
				1322	************************************************************************/
				1323
				1324	/**
				1325	* normalize_uri_path:
				1326	* @path: pointer to the path string
				1327	*
				1328	* Applies the 5 normalization steps to a path string--that is, RFC 2396
				1329	* Section 5.2, steps 6.c through 6.g.
				1330	*
				1331	* Normalization occurs directly on the string, no new allocation is done
				1332	*
				1333	* Returns 0 or an error code
				1334	*/
				1335	static int
				1336	normalize_uri_path(char *path) {
				1337	char cur, out;
				1338
				1339	if (path == NULL)
				1340	return(-1);
				1341
				1342	/* Skip all initial "/" chars. We want to get to the beginning of the
				1343	* first non-empty segment.
				1344	*/
				1345	cur = path;
				1346	while (cur[0] == '/')
				1347	++cur;
				1348	if (cur[0] == '\0')
				1349	return(0);
				1350
				1351	/* Keep everything we've seen so far. */
				1352	out = cur;
				1353
				1354	/*
				1355	* Analyze each segment in sequence for cases (c) and (d).
				1356	*/
				1357	while (cur[0] != '\0') {
				1358	/*
				1359	* c) All occurrences of "./", where "." is a complete path segment,
				1360	* are removed from the buffer string.
				1361	*/
				1362	if ((cur[0] == '.') && (cur[1] == '/')) {
				1363	cur += 2;
				1364	/* '//' normalization should be done at this point too */
				1365	while (cur[0] == '/')
				1366	cur++;
				1367	continue;
				1368	}
				1369
				1370	/*
				1371	* d) If the buffer string ends with "." as a complete path segment,
				1372	* that "." is removed.
				1373	*/
				1374	if ((cur[0] == '.') && (cur[1] == '\0'))
				1375	break;
				1376
				1377	/* Otherwise keep the segment. */
				1378	while (cur[0] != '/') {
				1379	if (cur[0] == '\0')
				1380	goto done_cd;
				1381	(out++)[0] = (cur++)[0];
				1382	}
				1383	/* nomalize // */
				1384	while ((cur[0] == '/') && (cur[1] == '/'))
				1385	cur++;
				1386
				1387	(out++)[0] = (cur++)[0];
				1388	}
				1389	done_cd:
				1390	out[0] = '\0';
				1391
				1392	/* Reset to the beginning of the first segment for the next sequence. */
				1393	cur = path;
				1394	while (cur[0] == '/')
				1395	++cur;
				1396	if (cur[0] == '\0')
				1397	return(0);
				1398
				1399	/*
				1400	* Analyze each segment in sequence for cases (e) and (f).
				1401	*
				1402	* e) All occurrences of "<segment>/../", where <segment> is a
				1403	* complete path segment not equal to "..", are removed from the
				1404	* buffer string. Removal of these path segments is performed
				1405	* iteratively, removing the leftmost matching pattern on each
				1406	* iteration, until no matching pattern remains.
				1407	*
				1408	* f) If the buffer string ends with "<segment>/..", where <segment>
				1409	* is a complete path segment not equal to "..", that
				1410	* "<segment>/.." is removed.
				1411	*
				1412	* To satisfy the "iterative" clause in (e), we need to collapse the
				1413	* string every time we find something that needs to be removed. Thus,
				1414	* we don't need to keep two pointers into the string: we only need a
				1415	* "current position" pointer.
				1416	*/
				1417	while (1) {
				1418	char segp, tmp;
				1419
				1420	/* At the beginning of each iteration of this loop, "cur" points to
				1421	* the first character of the segment we want to examine.
				1422	*/
				1423
				1424	/* Find the end of the current segment. */
				1425	segp = cur;
				1426	while ((segp[0] != '/') && (segp[0] != '\0'))
				1427	++segp;
				1428
				1429	/* If this is the last segment, we're done (we need at least two
				1430	* segments to meet the criteria for the (e) and (f) cases).
				1431	*/
				1432	if (segp[0] == '\0')
				1433	break;
				1434
				1435	/* If the first segment is "..", or if the next segment _isn't_ "..",
				1436	* keep this segment and try the next one.
				1437	*/
				1438	++segp;
				1439	if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
				1440	\|\| ((segp[0] != '.') \|\| (segp[1] != '.')
				1441	\|\| ((segp[2] != '/') && (segp[2] != '\0')))) {
				1442	cur = segp;
				1443	continue;
				1444	}
				1445
				1446	/* If we get here, remove this segment and the next one and back up
				1447	* to the previous segment (if there is one), to implement the
				1448	* "iteratively" clause. It's pretty much impossible to back up
				1449	* while maintaining two pointers into the buffer, so just compact
				1450	* the whole buffer now.
				1451	*/
				1452
				1453	/* If this is the end of the buffer, we're done. */
				1454	if (segp[2] == '\0') {
				1455	cur[0] = '\0';
				1456	break;
				1457	}
				1458	/* Valgrind complained, strcpy(cur, segp + 3); */
				1459	/* string will overlap, do not use strcpy */
				1460	tmp = cur;
				1461	segp += 3;
				1462	while ((tmp++ = segp++) != 0)
				1463	;
				1464
				1465	/* If there are no previous segments, then keep going from here. */
				1466	segp = cur;
				1467	while ((segp > path) && ((--segp)[0] == '/'))
				1468	;
				1469	if (segp == path)
				1470	continue;
				1471
				1472	/* "segp" is pointing to the end of a previous segment; find it's
				1473	* start. We need to back up to the previous segment and start
				1474	* over with that to handle things like "foo/bar/../..". If we
				1475	* don't do this, then on the first pass we'll remove the "bar/..",
				1476	* but be pointing at the second ".." so we won't realize we can also
				1477	* remove the "foo/..".
				1478	*/
				1479	cur = segp;
				1480	while ((cur > path) && (cur[-1] != '/'))
				1481	--cur;
				1482	}
				1483	out[0] = '\0';
				1484
				1485	/*
				1486	* g) If the resulting buffer string still begins with one or more
				1487	* complete path segments of "..", then the reference is
				1488	* considered to be in error. Implementations may handle this
				1489	* error by retaining these components in the resolved path (i.e.,
				1490	* treating them as part of the final URI), by removing them from
				1491	* the resolved path (i.e., discarding relative levels above the
				1492	* root), or by avoiding traversal of the reference.
				1493	*
				1494	* We discard them from the final path.
				1495	*/
				1496	if (path[0] == '/') {
				1497	cur = path;
				1498	while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
				1499	&& ((cur[3] == '/') \|\| (cur[3] == '\0')))
				1500	cur += 3;
				1501
				1502	if (cur != path) {
				1503	out = path;
				1504	while (cur[0] != '\0')
				1505	(out++)[0] = (cur++)[0];
				1506	out[0] = 0;
				1507	}
				1508	}
				1509
				1510	return(0);
				1511	}
				1512
				1513	static int is_hex(char c) {
				1514	if (((c >= '0') && (c <= '9')) \|\|
				1515	((c >= 'a') && (c <= 'f')) \|\|
				1516	((c >= 'A') && (c <= 'F')))
				1517	return(1);
				1518	return(0);
				1519	}
				1520
				1521
				1522	/**
				1523	* uri_string_unescape:
				1524	* @str: the string to unescape
				1525	* @len: the length in bytes to unescape (or <= 0 to indicate full string)
				1526	* @target: optional destination buffer
				1527	*
				1528	* Unescaping routine, but does not check that the string is an URI. The
				1529	* output is a direct unsigned char translation of %XX values (no encoding)
				1530	* Note that the length of the result can only be smaller or same size as
				1531	* the input string.
				1532	*
				1533	* Returns a copy of the string, but unescaped, will return NULL only in case
				1534	* of error
				1535	*/
				1536	char *
				1537	uri_string_unescape(const char str, int len, char target) {
				1538	char ret, out;
				1539	const char *in;
				1540
				1541	if (str == NULL)
				1542	return(NULL);
				1543	if (len <= 0) len = strlen(str);
				1544	if (len < 0) return(NULL);
				1545
				1546	if (target == NULL) {
				1547	ret = g_malloc(len + 1);
				1548	} else
				1549	ret = target;
				1550	in = str;
				1551	out = ret;
				1552	while(len > 0) {
				1553	if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
				1554	in++;
				1555	if ((in >= '0') && (in <= '9'))
				1556	out = (in - '0');
				1557	else if ((in >= 'a') && (in <= 'f'))
				1558	out = (in - 'a') + 10;
				1559	else if ((in >= 'A') && (in <= 'F'))
				1560	out = (in - 'A') + 10;
				1561	in++;
				1562	if ((in >= '0') && (in <= '9'))
				1563	out = out * 16 + (*in - '0');
				1564	else if ((in >= 'a') && (in <= 'f'))
				1565	out = out * 16 + (*in - 'a') + 10;
				1566	else if ((in >= 'A') && (in <= 'F'))
				1567	out = out * 16 + (*in - 'A') + 10;
				1568	in++;
				1569	len -= 3;
				1570	out++;
				1571	} else {
				1572	out++ = in++;
				1573	len--;
				1574	}
				1575	}
				1576	*out = 0;
				1577	return(ret);
				1578	}
				1579
				1580	/**
				1581	* uri_string_escape:
				1582	* @str: string to escape
				1583	* @list: exception list string of chars not to escape
				1584	*
				1585	* This routine escapes a string to hex, ignoring reserved characters (a-z)
				1586	* and the characters in the exception list.
				1587	*
				1588	* Returns a new escaped string or NULL in case of error.
				1589	*/
				1590	char *
				1591	uri_string_escape(const char str, const char list) {
				1592	char *ret, ch;
				1593	char *temp;
				1594	const char *in;
				1595	int len, out;
				1596
				1597	if (str == NULL)
				1598	return(NULL);
				1599	if (str[0] == 0)
				1600	return(g_strdup(str));
				1601	len = strlen(str);
				1602	if (!(len > 0)) return(NULL);
				1603
				1604	len += 20;
				1605	ret = g_malloc(len);
				1606	in = str;
				1607	out = 0;
				1608	while(*in != 0) {
				1609	if (len - out <= 3) {
				1610	temp = realloc2n(ret, &len);
				1611	ret = temp;
				1612	}
				1613
				1614	ch = *in;
				1615
				1616	if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!strchr(list, ch))) {
				1617	unsigned char val;
				1618	ret[out++] = '%';
				1619	val = ch >> 4;
				1620	if (val <= 9)
				1621	ret[out++] = '0' + val;
				1622	else
				1623	ret[out++] = 'A' + val - 0xA;
				1624	val = ch & 0xF;
				1625	if (val <= 9)
				1626	ret[out++] = '0' + val;
				1627	else
				1628	ret[out++] = 'A' + val - 0xA;
				1629	in++;
				1630	} else {
				1631	ret[out++] = *in++;
				1632	}
				1633
				1634	}
				1635	ret[out] = 0;
				1636	return(ret);
				1637	}
				1638
				1639	/************************************************************************
				1640	* *
				1641	* Public functions *
				1642	* *
				1643	************************************************************************/
				1644
				1645	/**
				1646	* uri_resolve:
				1647	* @URI: the URI instance found in the document
				1648	* @base: the base value
				1649	*
				1650	* Computes he final URI of the reference done by checking that
				1651	* the given URI is valid, and building the final URI using the
				1652	* base URI. This is processed according to section 5.2 of the
				1653	* RFC 2396
				1654	*
				1655	* 5.2. Resolving Relative References to Absolute Form
				1656	*
				1657	* Returns a new URI string (to be freed by the caller) or NULL in case
				1658	* of error.
				1659	*/
				1660	char *
				1661	uri_resolve(const char uri, const char base) {
				1662	char *val = NULL;
				1663	int ret, len, indx, cur, out;
				1664	URI *ref = NULL;
				1665	URI *bas = NULL;
				1666	URI *res = NULL;
				1667
				1668	/*
				1669	* 1) The URI reference is parsed into the potential four components and
				1670	* fragment identifier, as described in Section 4.3.
				1671	*
				1672	* NOTE that a completely empty URI is treated by modern browsers
				1673	* as a reference to "." rather than as a synonym for the current
				1674	* URI. Should we do that here?
				1675	*/
				1676	if (uri == NULL)
				1677	ret = -1;
				1678	else {
				1679	if (*uri) {
				1680	ref = uri_new();
				1681	if (ref == NULL)
				1682	goto done;
				1683	ret = uri_parse_into(ref, uri);
				1684	}
				1685	else
				1686	ret = 0;
				1687	}
				1688	if (ret != 0)
				1689	goto done;
				1690	if ((ref != NULL) && (ref->scheme != NULL)) {
				1691	/*
				1692	* The URI is absolute don't modify.
				1693	*/
				1694	val = g_strdup(uri);
				1695	goto done;
				1696	}
				1697	if (base == NULL)
				1698	ret = -1;
				1699	else {
				1700	bas = uri_new();
				1701	if (bas == NULL)
				1702	goto done;
				1703	ret = uri_parse_into(bas, base);
				1704	}
				1705	if (ret != 0) {
				1706	if (ref)
				1707	val = uri_to_string(ref);
				1708	goto done;
				1709	}
				1710	if (ref == NULL) {
				1711	/*
				1712	* the base fragment must be ignored
				1713	*/
				1714	if (bas->fragment != NULL) {
				1715	g_free(bas->fragment);
				1716	bas->fragment = NULL;
				1717	}
				1718	val = uri_to_string(bas);
				1719	goto done;
				1720	}
				1721
				1722	/*
				1723	* 2) If the path component is empty and the scheme, authority, and
				1724	* query components are undefined, then it is a reference to the
				1725	* current document and we are done. Otherwise, the reference URI's
				1726	* query and fragment components are defined as found (or not found)
				1727	* within the URI reference and not inherited from the base URI.
				1728	*
				1729	* NOTE that in modern browsers, the parsing differs from the above
				1730	* in the following aspect: the query component is allowed to be
				1731	* defined while still treating this as a reference to the current
				1732	* document.
				1733	*/
				1734	res = uri_new();
				1735	if (res == NULL)
				1736	goto done;
				1737	if ((ref->scheme == NULL) && (ref->path == NULL) &&
				1738	((ref->authority == NULL) && (ref->server == NULL))) {
				1739	if (bas->scheme != NULL)
				1740	res->scheme = g_strdup(bas->scheme);
				1741	if (bas->authority != NULL)
				1742	res->authority = g_strdup(bas->authority);
				1743	else if (bas->server != NULL) {
				1744	res->server = g_strdup(bas->server);
				1745	if (bas->user != NULL)
				1746	res->user = g_strdup(bas->user);
				1747	res->port = bas->port;
				1748	}
				1749	if (bas->path != NULL)
				1750	res->path = g_strdup(bas->path);
				1751	if (ref->query != NULL)
				1752	res->query = g_strdup (ref->query);
				1753	else if (bas->query != NULL)
				1754	res->query = g_strdup(bas->query);
				1755	if (ref->fragment != NULL)
				1756	res->fragment = g_strdup(ref->fragment);
				1757	goto step_7;
				1758	}
				1759
				1760	/*
				1761	* 3) If the scheme component is defined, indicating that the reference
				1762	* starts with a scheme name, then the reference is interpreted as an
				1763	* absolute URI and we are done. Otherwise, the reference URI's
				1764	* scheme is inherited from the base URI's scheme component.
				1765	*/
				1766	if (ref->scheme != NULL) {
				1767	val = uri_to_string(ref);
				1768	goto done;
				1769	}
				1770	if (bas->scheme != NULL)
				1771	res->scheme = g_strdup(bas->scheme);
				1772
				1773	if (ref->query != NULL)
				1774	res->query = g_strdup(ref->query);
				1775	if (ref->fragment != NULL)
				1776	res->fragment = g_strdup(ref->fragment);
				1777
				1778	/*
				1779	* 4) If the authority component is defined, then the reference is a
				1780	* network-path and we skip to step 7. Otherwise, the reference
				1781	* URI's authority is inherited from the base URI's authority
				1782	* component, which will also be undefined if the URI scheme does not
				1783	* use an authority component.
				1784	*/
				1785	if ((ref->authority != NULL) \|\| (ref->server != NULL)) {
				1786	if (ref->authority != NULL)
				1787	res->authority = g_strdup(ref->authority);
				1788	else {
				1789	res->server = g_strdup(ref->server);
				1790	if (ref->user != NULL)
				1791	res->user = g_strdup(ref->user);
				1792	res->port = ref->port;
				1793	}
				1794	if (ref->path != NULL)
				1795	res->path = g_strdup(ref->path);
				1796	goto step_7;
				1797	}
				1798	if (bas->authority != NULL)
				1799	res->authority = g_strdup(bas->authority);
				1800	else if (bas->server != NULL) {
				1801	res->server = g_strdup(bas->server);
				1802	if (bas->user != NULL)
				1803	res->user = g_strdup(bas->user);
				1804	res->port = bas->port;
				1805	}
				1806
				1807	/*
				1808	* 5) If the path component begins with a slash character ("/"), then
				1809	* the reference is an absolute-path and we skip to step 7.
				1810	*/
				1811	if ((ref->path != NULL) && (ref->path[0] == '/')) {
				1812	res->path = g_strdup(ref->path);
				1813	goto step_7;
				1814	}
				1815
				1816
				1817	/*
				1818	* 6) If this step is reached, then we are resolving a relative-path
				1819	* reference. The relative path needs to be merged with the base
				1820	* URI's path. Although there are many ways to do this, we will
				1821	* describe a simple method using a separate string buffer.
				1822	*
				1823	* Allocate a buffer large enough for the result string.
				1824	*/
				1825	len = 2; /* extra / and 0 */
				1826	if (ref->path != NULL)
				1827	len += strlen(ref->path);
				1828	if (bas->path != NULL)
				1829	len += strlen(bas->path);
				1830	res->path = g_malloc(len);
				1831	res->path[0] = 0;
				1832
				1833	/*
				1834	* a) All but the last segment of the base URI's path component is
				1835	* copied to the buffer. In other words, any characters after the
				1836	* last (right-most) slash character, if any, are excluded.
				1837	*/
				1838	cur = 0;
				1839	out = 0;
				1840	if (bas->path != NULL) {
				1841	while (bas->path[cur] != 0) {
				1842	while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
				1843	cur++;
				1844	if (bas->path[cur] == 0)
				1845	break;
				1846
				1847	cur++;
				1848	while (out < cur) {
				1849	res->path[out] = bas->path[out];
				1850	out++;
				1851	}
				1852	}
				1853	}
				1854	res->path[out] = 0;
				1855
				1856	/*
				1857	* b) The reference's path component is appended to the buffer
				1858	* string.
				1859	*/
				1860	if (ref->path != NULL && ref->path[0] != 0) {
				1861	indx = 0;
				1862	/*
				1863	* Ensure the path includes a '/'
				1864	*/
				1865	if ((out == 0) && (bas->server != NULL))
				1866	res->path[out++] = '/';
				1867	while (ref->path[indx] != 0) {
				1868	res->path[out++] = ref->path[indx++];
				1869	}
				1870	}
				1871	res->path[out] = 0;
				1872
				1873	/*
				1874	* Steps c) to h) are really path normalization steps
				1875	*/
				1876	normalize_uri_path(res->path);
				1877
				1878	step_7:
				1879
				1880	/*
				1881	* 7) The resulting URI components, including any inherited from the
				1882	* base URI, are recombined to give the absolute form of the URI
				1883	* reference.
				1884	*/
				1885	val = uri_to_string(res);
				1886
				1887	done:
				1888	if (ref != NULL)
				1889	uri_free(ref);
				1890	if (bas != NULL)
				1891	uri_free(bas);
				1892	if (res != NULL)
				1893	uri_free(res);
				1894	return(val);
				1895	}
				1896
				1897	/**
				1898	* uri_resolve_relative:
				1899	* @URI: the URI reference under consideration
				1900	* @base: the base value
				1901	*
				1902	* Expresses the URI of the reference in terms relative to the
				1903	* base. Some examples of this operation include:
				1904	* base = "http://site1.com/docs/book1.html"
				1905	* URI input URI returned
				1906	* docs/pic1.gif pic1.gif
				1907	* docs/img/pic1.gif img/pic1.gif
				1908	* img/pic1.gif ../img/pic1.gif
				1909	* http://site1.com/docs/pic1.gif pic1.gif
				1910	* http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif
				1911	*
				1912	* base = "docs/book1.html"
				1913	* URI input URI returned
				1914	* docs/pic1.gif pic1.gif
				1915	* docs/img/pic1.gif img/pic1.gif
				1916	* img/pic1.gif ../img/pic1.gif
				1917	* http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif
				1918	*
				1919	*
Stefan Weil	a93cf9d	2012-11-02 08:29:53 +0100	[diff] [blame]	1920	* Note: if the URI reference is really weird or complicated, it may be
Paolo Bonzini	ca0defb	2012-09-24 14:42:02 +0530	[diff] [blame]	1921	* worthwhile to first convert it into a "nice" one by calling
				1922	* uri_resolve (using 'base') before calling this routine,
				1923	* since this routine (for reasonable efficiency) assumes URI has
				1924	* already been through some validation.
				1925	*
				1926	* Returns a new URI string (to be freed by the caller) or NULL in case
				1927	* error.
				1928	*/
				1929	char *
				1930	uri_resolve_relative (const char uri, const char base)
				1931	{
				1932	char *val = NULL;
				1933	int ret;
				1934	int ix;
				1935	int pos = 0;
				1936	int nbslash = 0;
				1937	int len;
				1938	URI *ref = NULL;
				1939	URI *bas = NULL;
				1940	char bptr, uptr, *vptr;
				1941	int remove_path = 0;
				1942
				1943	if ((uri == NULL) \|\| (*uri == 0))
				1944	return NULL;
				1945
				1946	/*
				1947	* First parse URI into a standard form
				1948	*/
				1949	ref = uri_new ();
				1950	if (ref == NULL)
				1951	return NULL;
				1952	/* If URI not already in "relative" form */
				1953	if (uri[0] != '.') {
				1954	ret = uri_parse_into (ref, uri);
				1955	if (ret != 0)
				1956	goto done; /* Error in URI, return NULL */
				1957	} else
				1958	ref->path = g_strdup(uri);
				1959
				1960	/*
				1961	* Next parse base into the same standard form
				1962	*/
				1963	if ((base == NULL) \|\| (*base == 0)) {
				1964	val = g_strdup (uri);
				1965	goto done;
				1966	}
				1967	bas = uri_new ();
				1968	if (bas == NULL)
				1969	goto done;
				1970	if (base[0] != '.') {
				1971	ret = uri_parse_into (bas, base);
				1972	if (ret != 0)
				1973	goto done; /* Error in base, return NULL */
				1974	} else
				1975	bas->path = g_strdup(base);
				1976
				1977	/*
				1978	* If the scheme / server on the URI differs from the base,
				1979	* just return the URI
				1980	*/
				1981	if ((ref->scheme != NULL) &&
				1982	((bas->scheme == NULL) \|\|
				1983	(strcmp (bas->scheme, ref->scheme)) \|\|
				1984	(strcmp (bas->server, ref->server)))) {
				1985	val = g_strdup (uri);
				1986	goto done;
				1987	}
				1988	if (!strcmp(bas->path, ref->path)) {
				1989	val = g_strdup("");
				1990	goto done;
				1991	}
				1992	if (bas->path == NULL) {
				1993	val = g_strdup(ref->path);
				1994	goto done;
				1995	}
				1996	if (ref->path == NULL) {
				1997	ref->path = (char *) "/";
				1998	remove_path = 1;
				1999	}
				2000
				2001	/*
				2002	* At this point (at last!) we can compare the two paths
				2003	*
				2004	* First we take care of the special case where either of the
				2005	* two path components may be missing (bug 316224)
				2006	*/
				2007	if (bas->path == NULL) {
				2008	if (ref->path != NULL) {
				2009	uptr = ref->path;
				2010	if (*uptr == '/')
				2011	uptr++;
				2012	/* exception characters from uri_to_string */
				2013	val = uri_string_escape(uptr, "/;&=+$,");
				2014	}
				2015	goto done;
				2016	}
				2017	bptr = bas->path;
				2018	if (ref->path == NULL) {
				2019	for (ix = 0; bptr[ix] != 0; ix++) {
				2020	if (bptr[ix] == '/')
				2021	nbslash++;
				2022	}
				2023	uptr = NULL;
				2024	len = 1; /* this is for a string terminator only */
				2025	} else {
				2026	/*
				2027	* Next we compare the two strings and find where they first differ
				2028	*/
				2029	if ((ref->path[pos] == '.') && (ref->path[pos+1] == '/'))
				2030	pos += 2;
				2031	if ((*bptr == '.') && (bptr[1] == '/'))
				2032	bptr += 2;
				2033	else if ((*bptr == '/') && (ref->path[pos] != '/'))
				2034	bptr++;
				2035	while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0))
				2036	pos++;
				2037
				2038	if (bptr[pos] == ref->path[pos]) {
				2039	val = g_strdup("");
				2040	goto done; /* (I can't imagine why anyone would do this) */
				2041	}
				2042
				2043	/*
				2044	* In URI, "back up" to the last '/' encountered. This will be the
				2045	* beginning of the "unique" suffix of URI
				2046	*/
				2047	ix = pos;
				2048	if ((ref->path[ix] == '/') && (ix > 0))
				2049	ix--;
				2050	else if ((ref->path[ix] == 0) && (ix > 1) && (ref->path[ix - 1] == '/'))
				2051	ix -= 2;
				2052	for (; ix > 0; ix--) {
				2053	if (ref->path[ix] == '/')
				2054	break;
				2055	}
				2056	if (ix == 0) {
				2057	uptr = ref->path;
				2058	} else {
				2059	ix++;
				2060	uptr = &ref->path[ix];
				2061	}
				2062
				2063	/*
				2064	* In base, count the number of '/' from the differing point
				2065	*/
				2066	if (bptr[pos] != ref->path[pos]) {/* check for trivial URI == base */
				2067	for (; bptr[ix] != 0; ix++) {
				2068	if (bptr[ix] == '/')
				2069	nbslash++;
				2070	}
				2071	}
				2072	len = strlen (uptr) + 1;
				2073	}
				2074
				2075	if (nbslash == 0) {
				2076	if (uptr != NULL)
				2077	/* exception characters from uri_to_string */
				2078	val = uri_string_escape(uptr, "/;&=+$,");
				2079	goto done;
				2080	}
				2081
				2082	/*
				2083	* Allocate just enough space for the returned string -
				2084	* length of the remainder of the URI, plus enough space
				2085	* for the "../" groups, plus one for the terminator
				2086	*/
				2087	val = g_malloc (len + 3 * nbslash);
				2088	vptr = val;
				2089	/*
				2090	* Put in as many "../" as needed
				2091	*/
				2092	for (; nbslash>0; nbslash--) {
				2093	*vptr++ = '.';
				2094	*vptr++ = '.';
				2095	*vptr++ = '/';
				2096	}
				2097	/*
				2098	* Finish up with the end of the URI
				2099	*/
				2100	if (uptr != NULL) {
				2101	if ((vptr > val) && (len > 0) &&
				2102	(uptr[0] == '/') && (vptr[-1] == '/')) {
				2103	memcpy (vptr, uptr + 1, len - 1);
				2104	vptr[len - 2] = 0;
				2105	} else {
				2106	memcpy (vptr, uptr, len);
				2107	vptr[len - 1] = 0;
				2108	}
				2109	} else {
				2110	vptr[len - 1] = 0;
				2111	}
				2112
				2113	/* escape the freshly-built path */
				2114	vptr = val;
				2115	/* exception characters from uri_to_string */
				2116	val = uri_string_escape(vptr, "/;&=+$,");
				2117	g_free(vptr);
				2118
				2119	done:
				2120	/*
				2121	* Free the working variables
				2122	*/
				2123	if (remove_path != 0)
				2124	ref->path = NULL;
				2125	if (ref != NULL)
				2126	uri_free (ref);
				2127	if (bas != NULL)
				2128	uri_free (bas);
				2129
				2130	return val;
				2131	}
				2132
				2133	/*
				2134	* Utility functions to help parse and assemble query strings.
				2135	*/
				2136
				2137	struct QueryParams *
				2138	query_params_new (int init_alloc)
				2139	{
				2140	struct QueryParams *ps;
				2141
				2142	if (init_alloc <= 0) init_alloc = 1;
				2143
				2144	ps = g_new(QueryParams, 1);
				2145	ps->n = 0;
				2146	ps->alloc = init_alloc;
				2147	ps->p = g_new(QueryParam, ps->alloc);
				2148
				2149	return ps;
				2150	}
				2151
				2152	/* Ensure there is space to store at least one more parameter
				2153	* at the end of the set.
				2154	*/
				2155	static int
				2156	query_params_append (struct QueryParams *ps,
				2157	const char name, const char value)
				2158	{
				2159	if (ps->n >= ps->alloc) {
				2160	ps->p = g_renew(QueryParam, ps->p, ps->alloc * 2);
				2161	ps->alloc *= 2;
				2162	}
				2163
				2164	ps->p[ps->n].name = g_strdup(name);
Dong Xu Wang	7f303ad	2013-05-09 15:53:49 +0800	[diff] [blame]	2165	ps->p[ps->n].value = g_strdup(value);
Paolo Bonzini	ca0defb	2012-09-24 14:42:02 +0530	[diff] [blame]	2166	ps->p[ps->n].ignore = 0;
				2167	ps->n++;
				2168
				2169	return 0;
				2170	}
				2171
				2172	void
				2173	query_params_free (struct QueryParams *ps)
				2174	{
				2175	int i;
				2176
				2177	for (i = 0; i < ps->n; ++i) {
				2178	g_free (ps->p[i].name);
				2179	g_free (ps->p[i].value);
				2180	}
				2181	g_free (ps->p);
				2182	g_free (ps);
				2183	}
				2184
				2185	struct QueryParams *
				2186	query_params_parse (const char *query)
				2187	{
				2188	struct QueryParams *ps;
				2189	const char end, eq;
				2190
				2191	ps = query_params_new (0);
				2192	if (!query \|\| query[0] == '\0') return ps;
				2193
				2194	while (*query) {
				2195	char name = NULL, value = NULL;
				2196
				2197	/* Find the next separator, or end of the string. */
				2198	end = strchr (query, '&');
				2199	if (!end)
				2200	end = strchr (query, ';');
				2201	if (!end)
				2202	end = query + strlen (query);
				2203
				2204	/* Find the first '=' character between here and end. */
				2205	eq = strchr (query, '=');
				2206	if (eq && eq >= end) eq = NULL;
				2207
				2208	/* Empty section (eg. "&&"). */
				2209	if (end == query)
				2210	goto next;
				2211
				2212	/* If there is no '=' character, then we have just "name"
				2213	* and consistent with CGI.pm we assume value is "".
				2214	*/
				2215	else if (!eq) {
				2216	name = uri_string_unescape (query, end - query, NULL);
				2217	value = NULL;
				2218	}
				2219	/* Or if we have "name=" here (works around annoying
				2220	* problem when calling uri_string_unescape with len = 0).
				2221	*/
				2222	else if (eq+1 == end) {
				2223	name = uri_string_unescape (query, eq - query, NULL);
				2224	value = g_new0(char, 1);
				2225	}
				2226	/* If the '=' character is at the beginning then we have
				2227	* "=value" and consistent with CGI.pm we _ignore_ this.
				2228	*/
				2229	else if (query == eq)
				2230	goto next;
				2231
				2232	/* Otherwise it's "name=value". */
				2233	else {
				2234	name = uri_string_unescape (query, eq - query, NULL);
				2235	value = uri_string_unescape (eq+1, end - (eq+1), NULL);
				2236	}
				2237
				2238	/* Append to the parameter set. */
				2239	query_params_append (ps, name, value);
				2240	g_free(name);
				2241	g_free(value);
				2242
				2243	next:
				2244	query = end;
				2245	if (query) query ++; / skip '&' separator */
				2246	}
				2247
				2248	return ps;
				2249	}