1 /* 2 * $LynxId: HTParse.h,v 1.21 2010/09/24 22:45:23 tom Exp $ 3 * HTParse: URL parsing in the WWW Library 4 * HTPARSE 5 * 6 * This module of the WWW library contains code to parse URLs and various 7 * related things. 8 * Implemented by HTParse.c . 9 */ 10 #ifndef HTPARSE_H 11 #define HTPARSE_H 12 13 #ifndef HTUTILS_H 14 #include <HTUtils.h> 15 #endif 16 17 #ifdef __cplusplus 18 extern "C" { 19 #endif 20 /* 21 * The following are flag bits which may be ORed together to form 22 * a number to give the 'wanted' argument to HTParse. 23 */ 24 #define PARSE_ACCESS 16 25 #define PARSE_HOST 8 26 #define PARSE_PATH 4 27 #define PARSE_ANCHOR 2 28 #define PARSE_PUNCTUATION 1 29 #define PARSE_ALL 31 30 #define PARSE_ALL_WITHOUT_ANCHOR (PARSE_ALL ^ PARSE_ANCHOR) 31 /* 32 * Additional flag bits for more details on components already 33 * covered by the above. The PARSE_PATH above doesn't really 34 * strictly refer to the path component in the sense of the URI 35 * specs only, but rather to that combined with a possible query 36 * component. - kw 37 */ 38 #define PARSE_STRICTPATH 32 39 #define PARSE_QUERY 64 40 /* 41 * The following are valid mask values. The terms are the BNF names 42 * in the URL document. 43 */ 44 #define URL_XALPHAS UCH(1) 45 #define URL_XPALPHAS UCH(2) 46 #define URL_PATH UCH(4) 47 /* Strip white space off a string. HTStrip() 48 * ------------------------------- 49 * 50 * On exit, 51 * Return value points to first non-white character, or to 0 if none. 52 * All trailing white space is OVERWRITTEN with zero. 53 */ extern char *HTStrip(char *s); 54 55 /* 56 * Parse a port number 57 * ------------------- 58 * 59 * On entry, 60 * host A pointer to hostname possibly followed by port 61 * 62 * On exit, 63 * returns A pointer to the ":" before the port 64 * sets the port number via the pointer portp. 65 */ 66 extern char *HTParsePort(char *host, int *portp); 67 68 /* Parse a Name relative to another name. HTParse() 69 * -------------------------------------- 70 * 71 * This returns those parts of a name which are given (and requested) 72 * substituting bits from the related name where necessary. 73 * 74 * On entry, 75 * aName A filename given 76 * relatedName A name relative to which aName is to be parsed 77 * wanted A mask for the bits which are wanted. 78 * 79 * On exit, 80 * returns A pointer to a malloc'd string which MUST BE FREED 81 */ 82 extern char *HTParse(const char *aName, 83 const char *relatedName, 84 int wanted); 85 86 /* HTParseAnchor(), fast HTParse() specialization 87 * ---------------------------------------------- 88 * 89 * On exit, 90 * returns A pointer within input string (probably to its end '\0') 91 */ 92 extern const char *HTParseAnchor(const char *aName); 93 94 /* Simplify a filename. HTSimplify() 95 * -------------------- 96 * 97 * A unix-style file is allowed to contain the seqeunce xxx/../ which may 98 * be replaced by "" , and the seqeunce "/./" which may be replaced by "/". 99 * Simplification helps us recognize duplicate filenames. 100 * 101 * Thus, /etc/junk/../fred becomes /etc/fred 102 * /etc/junk/./fred becomes /etc/junk/fred 103 * 104 * but we should NOT change 105 * http://fred.xxx.edu/../.. 106 * 107 * or ../../albert.html 108 */ 109 extern void HTSimplify(char *filename); 110 111 /* Make Relative Name. HTRelative() 112 * ------------------- 113 * 114 * This function creates and returns a string which gives an expression of 115 * one address as related to another. Where there is no relation, an absolute 116 * address is retured. 117 * 118 * On entry, 119 * Both names must be absolute, fully qualified names of nodes 120 * (no anchor bits) 121 * 122 * On exit, 123 * The return result points to a newly allocated name which, if 124 * parsed by HTParse relative to relatedName, will yield aName. 125 * The caller is responsible for freeing the resulting name later. 126 * 127 */ 128 extern char *HTRelative(const char *aName, 129 const char *relatedName); 130 131 /* Escape undesirable characters using % HTEscape() 132 * ------------------------------------- 133 * 134 * This function takes a pointer to a string in which 135 * some characters may be unacceptable are unescaped. 136 * It returns a string which has these characters 137 * represented by a '%' character followed by two hex digits. 138 * 139 * Unlike HTUnEscape(), this routine returns a malloc'd string. 140 */ 141 extern char *HTEscape(const char *str, 142 unsigned mask); 143 144 /* Escape unsafe characters using % HTEscapeUnsafe() 145 * -------------------------------- 146 * 147 * This function takes a pointer to a string in which 148 * some characters may be that may be unsafe are unescaped. 149 * It returns a string which has these characters 150 * represented by a '%' character followed by two hex digits. 151 * 152 * Unlike HTUnEscape(), this routine returns a malloc'd string. 153 */ 154 extern char *HTEscapeUnsafe(const char *str); 155 156 /* Escape undesirable characters using % but space to +. HTEscapeSP() 157 * ----------------------------------------------------- 158 * 159 * This function takes a pointer to a string in which 160 * some characters may be unacceptable are unescaped. 161 * It returns a string which has these characters 162 * represented by a '%' character followed by two hex digits, 163 * except that spaces are converted to '+' instead of %2B. 164 * 165 * Unlike HTUnEscape(), this routine returns a malloc'd string. 166 */ 167 extern char *HTEscapeSP(const char *str, 168 unsigned mask); 169 170 /* Decode %xx escaped characters. HTUnEscape() 171 * ------------------------------ 172 * 173 * This function takes a pointer to a string in which some 174 * characters may have been encoded in %xy form, where xy is 175 * the acsii hex code for character 16x+y. 176 * The string is converted in place, as it will never grow. 177 */ 178 extern char *HTUnEscape(char *str); 179 180 /* Decode some %xx escaped characters. HTUnEscapeSome() 181 * ----------------------------------- Klaus Weide 182 * (kweide@tezcat.com) 183 * This function takes a pointer to a string in which some 184 * characters may have been encoded in %xy form, where xy is 185 * the acsii hex code for character 16x+y, and a pointer to 186 * a second string containing one or more characters which 187 * should be unescaped if escaped in the first string. 188 * The first string is converted in place, as it will never grow. 189 */ 190 extern char *HTUnEscapeSome(char *str, 191 const char *do_trans); 192 193 /* 194 * Turn a string which is not a RFC 822 token into a quoted-string. - KW 195 */ 196 extern void HTMake822Word(char **str, 197 int quoted); 198 199 #ifdef __cplusplus 200 } 201 #endif 202 #endif /* HTPARSE_H */ 203