1 /*
2  * $LynxId: HTParse.h,v 1.21 2010/09/24 22:45:23 tom Exp $
3  *				HTParse:  URL parsing in the WWW Library
4  *				HTPARSE
5  *
6  *  This module of the WWW library contains code to parse URLs and various
7  *  related things.
8  *  Implemented by HTParse.c .
9  */
10 #ifndef HTPARSE_H
11 #define HTPARSE_H
12 
13 #ifndef HTUTILS_H
14 #include <HTUtils.h>
15 #endif
16 
17 #ifdef __cplusplus
18 extern "C" {
19 #endif
20 /*
21  *  The following are flag bits which may be ORed together to form
22  *  a number to give the 'wanted' argument to HTParse.
23  */
24 #define PARSE_ACCESS            16
25 #define PARSE_HOST               8
26 #define PARSE_PATH               4
27 #define PARSE_ANCHOR             2
28 #define PARSE_PUNCTUATION        1
29 #define PARSE_ALL               31
30 #define PARSE_ALL_WITHOUT_ANCHOR  (PARSE_ALL ^ PARSE_ANCHOR)
31 /*
32  *  Additional flag bits for more details on components already
33  *  covered by the above.  The PARSE_PATH above doesn't really
34  *  strictly refer to the path component in the sense of the URI
35  *  specs only, but rather to that combined with a possible query
36  *  component. - kw
37  */
38 #define PARSE_STRICTPATH        32
39 #define PARSE_QUERY             64
40 /*
41  *  The following are valid mask values.  The terms are the BNF names
42  *  in the URL document.
43  */
44 #define URL_XALPHAS     UCH(1)
45 #define URL_XPALPHAS    UCH(2)
46 #define URL_PATH        UCH(4)
47 /*	Strip white space off a string.				HTStrip()
48  *	-------------------------------
49  *
50  * On exit,
51  *	Return value points to first non-white character, or to 0 if none.
52  *	All trailing white space is OVERWRITTEN with zero.
53  */ extern char *HTStrip(char *s);
54 
55 /*
56  *	Parse a port number
57  *	-------------------
58  *
59  * On entry,
60  *	host            A pointer to hostname possibly followed by port
61  *
62  * On exit,
63  *	returns         A pointer to the ":" before the port
64  *	sets            the port number via the pointer portp.
65  */
66     extern char *HTParsePort(char *host, int *portp);
67 
68 /*	Parse a Name relative to another name.			HTParse()
69  *	--------------------------------------
70  *
71  *	This returns those parts of a name which are given (and requested)
72  *	substituting bits from the related name where necessary.
73  *
74  * On entry,
75  *	aName		A filename given
76  *      relatedName     A name relative to which aName is to be parsed
77  *      wanted          A mask for the bits which are wanted.
78  *
79  * On exit,
80  *	returns		A pointer to a malloc'd string which MUST BE FREED
81  */
82     extern char *HTParse(const char *aName,
83 			 const char *relatedName,
84 			 int wanted);
85 
86 /*	HTParseAnchor(), fast HTParse() specialization
87  *	----------------------------------------------
88  *
89  * On exit,
90  *	returns		A pointer within input string (probably to its end '\0')
91  */
92     extern const char *HTParseAnchor(const char *aName);
93 
94 /*	Simplify a filename.				HTSimplify()
95  *	--------------------
96  *
97  *  A unix-style file is allowed to contain the seqeunce xxx/../ which may
98  *  be replaced by "" , and the seqeunce "/./" which may be replaced by "/".
99  *  Simplification helps us recognize duplicate filenames.
100  *
101  *	Thus,	/etc/junk/../fred	becomes /etc/fred
102  *		/etc/junk/./fred	becomes	/etc/junk/fred
103  *
104  *      but we should NOT change
105  *		http://fred.xxx.edu/../..
106  *
107  *	or	../../albert.html
108  */
109     extern void HTSimplify(char *filename);
110 
111 /*	Make Relative Name.					HTRelative()
112  *	-------------------
113  *
114  * This function creates and returns a string which gives an expression of
115  * one address as related to another.  Where there is no relation, an absolute
116  * address is retured.
117  *
118  *  On entry,
119  *	Both names must be absolute, fully qualified names of nodes
120  *	(no anchor bits)
121  *
122  *  On exit,
123  *	The return result points to a newly allocated name which, if
124  *	parsed by HTParse relative to relatedName, will yield aName.
125  *	The caller is responsible for freeing the resulting name later.
126  *
127  */
128     extern char *HTRelative(const char *aName,
129 			    const char *relatedName);
130 
131 /*		Escape undesirable characters using %		HTEscape()
132  *		-------------------------------------
133  *
134  *	This function takes a pointer to a string in which
135  *	some characters may be unacceptable are unescaped.
136  *	It returns a string which has these characters
137  *	represented by a '%' character followed by two hex digits.
138  *
139  *	Unlike HTUnEscape(), this routine returns a malloc'd string.
140  */
141     extern char *HTEscape(const char *str,
142 			  unsigned mask);
143 
144 /*		Escape unsafe characters using %		HTEscapeUnsafe()
145  *		--------------------------------
146  *
147  *	This function takes a pointer to a string in which
148  *	some characters may be that may be unsafe are unescaped.
149  *	It returns a string which has these characters
150  *	represented by a '%' character followed by two hex digits.
151  *
152  *	Unlike HTUnEscape(), this routine returns a malloc'd string.
153  */
154     extern char *HTEscapeUnsafe(const char *str);
155 
156 /*	Escape undesirable characters using % but space to +.	HTEscapeSP()
157  *	-----------------------------------------------------
158  *
159  *	This function takes a pointer to a string in which
160  *	some characters may be unacceptable are unescaped.
161  *	It returns a string which has these characters
162  *	represented by a '%' character followed by two hex digits,
163  *	except that spaces are converted to '+' instead of %2B.
164  *
165  *	Unlike HTUnEscape(), this routine returns a malloc'd string.
166  */
167     extern char *HTEscapeSP(const char *str,
168 			    unsigned mask);
169 
170 /*	Decode %xx escaped characters.				HTUnEscape()
171  *	------------------------------
172  *
173  *	This function takes a pointer to a string in which some
174  *	characters may have been encoded in %xy form, where xy is
175  *	the acsii hex code for character 16x+y.
176  *	The string is converted in place, as it will never grow.
177  */
178     extern char *HTUnEscape(char *str);
179 
180 /*	Decode some %xx escaped characters.		      HTUnEscapeSome()
181  *	-----------------------------------			Klaus Weide
182  *							    (kweide@tezcat.com)
183  *	This function takes a pointer to a string in which some
184  *	characters may have been encoded in %xy form, where xy is
185  *	the acsii hex code for character 16x+y, and a pointer to
186  *	a second string containing one or more characters which
187  *	should be unescaped if escaped in the first string.
188  *	The first string is converted in place, as it will never grow.
189  */
190     extern char *HTUnEscapeSome(char *str,
191 				const char *do_trans);
192 
193 /*
194  *  Turn a string which is not a RFC 822 token into a quoted-string. - KW
195  */
196     extern void HTMake822Word(char **str,
197 			      int quoted);
198 
199 #ifdef __cplusplus
200 }
201 #endif
202 #endif				/* HTPARSE_H */
203