Version 1.8.0.0
[socat.git] / nestlex.c
blob19d338117a6e82bd88a48e9b9fdbd6f1080c839f
1 /* source: nestlex.c */
2 /* Copyright Gerhard Rieger and contributors (see file CHANGES) */
3 /* Published under the GNU General Public License V.2, see file COPYING */
5 /* a function for lexical scanning of nested character patterns */
7 #include "config.h"
8 #include "mytypes.h"
10 #include "sysincludes.h"
12 static int _nestlex(const char **addr,
13 char **token,
14 ptrdiff_t *len,
15 const char *ends[],
16 const char *hquotes[],
17 const char *squotes[],
18 const char *nests[],
19 bool dropquotes,
20 bool c_esc,
21 bool html_esc
24 /* sub: scan a string and copy its value to output string
25 end scanning when an unescaped, unnested string from ends array is found
26 does not copy the end pattern
27 does not write a trailing \0 to token
28 allows escaping with \ and quoting (\ and quotes are removed)
29 allows nesting with div. parens
30 returns -1 if out string was too small
31 returns 1 if addr ended unexpectedly
32 returns 0 if token could be extracted successfully
34 int nestlex(const char **addr, /* input string; aft points to end token */
35 char **token, /* output token; aft points to first unwritten
36 char (caller might want to set it to \0) */
37 size_t *len, /* remaining bytes in token space (incl. \0) */
38 const char *ends[], /* list of end strings */
39 const char *hquotes[],/* list of strings that quote (hard qu.) */
40 const char *squotes[],/* list of strings that quote softly */
41 const char *nests[],/* list of strings that start nesting;
42 every second one is matching end */
43 bool dropquotes, /* drop the outermost quotes */
44 bool c_esc, /* solve C char escapes: \n \t \0 etc */
45 bool html_esc /* solve HTML char escapes: %0d %08 etc */
46 ) {
47 return
48 _nestlex(addr, token, (ptrdiff_t *)len, ends, hquotes, squotes, nests,
49 dropquotes, c_esc, html_esc);
52 static int _nestlex(const char **addr,
53 char **token,
54 ptrdiff_t *len,
55 const char *ends[],
56 const char *hquotes[],
57 const char *squotes[],
58 const char *nests[],
59 bool dropquotes,
60 bool c_esc,
61 bool html_esc
62 ) {
63 const char *in = *addr; /* pointer into input string */
64 const char **endx; /* loops over end patterns */
65 const char **quotx; /* loops over quote patterns */
66 const char **nestx; /* loops over nest patterns */
67 char *out = *token; /* pointer into output token */
68 char c;
69 int i;
70 int result;
72 while (true) {
74 /* is this end of input string? */
75 if (*in == 0) {
77 break; /* end of string */
80 /* first check the end patterns (e.g. for ']') */
81 endx = ends; i = 0;
82 while (*endx) {
83 if (!strncmp(in, *endx, strlen(*endx))) {
84 /* this end pattern matches */
85 *addr = in;
86 *token = out;
87 return 0;
89 ++endx;
92 /* check for hard quoting pattern */
93 quotx = hquotes;
94 while (hquotes && *quotx) {
95 if (!strncmp(in, *quotx, strlen(*quotx))) {
96 /* this quote pattern matches */
97 const char *endnest[2];
98 if (dropquotes) {
99 /* we strip this quote */
100 in += strlen(*quotx);
101 } else {
102 for (i = strlen(*quotx); i > 0; --i) {
103 *out++ = *in++;
104 if (--*len <= 0) { *addr = in; *token = out; return -1; }
107 /* we call _nestlex recursively */
108 endnest[0] = *quotx;
109 endnest[1] = NULL;
110 result =
111 _nestlex(&in, &out, len, endnest, NULL/*hquotes*/,
112 NULL/*squotes*/, NULL/*nests*/,
113 false, c_esc, html_esc);
114 if (result == 0 && dropquotes) {
115 /* we strip this quote */
116 in += strlen(*quotx);
117 } else if (result < 0) {
118 *addr = in; *token = out; return result;
119 } else {
120 /* we copy the trailing quote */
121 for (i = strlen(*quotx); i > 0; --i) {
122 *out++ = *in++;
123 if (--*len <= 0) { *addr = in; *token = out; return -1; }
127 break;
129 ++quotx;
131 if (hquotes && *quotx != NULL) {
132 /* there was a quote; string might continue with hard quote */
133 continue;
136 /* check for soft quoting pattern */
137 quotx = squotes;
138 while (squotes && *quotx) {
139 if (!strncmp(in, *quotx, strlen(*quotx))) {
140 /* this quote pattern matches */
141 /* we strip this quote */
142 /* we call _nestlex recursively */
143 const char *endnest[2];
144 if (dropquotes) {
145 /* we strip this quote */
146 in += strlen(*quotx);
147 } else {
148 for (i = strlen(*quotx); i > 0; --i) {
149 *out++ = *in++;
150 if (--*len <= 0) { *addr = in; *token = out; return -1; }
153 endnest[0] = *quotx;
154 endnest[1] = NULL;
155 result =
156 _nestlex(&in, &out, len, endnest, hquotes,
157 squotes, nests,
158 false, c_esc, html_esc);
160 if (result == 0 && dropquotes) {
161 /* we strip the trailing quote */
162 if (!in[0] || strncmp(in, *quotx, strlen(*quotx))) return 1;
163 in += strlen(*quotx);
164 } else if (result < 0) {
165 *addr = in; *token = out; return result;
166 } else {
167 /* we copy the trailing quote */
168 for (i = strlen(*quotx); i > 0; --i) {
169 *out++ = *in++;
170 if (--*len <= 0) { *addr = in; *token = out; return -1; }
173 break;
175 ++quotx;
177 if (squotes && *quotx != NULL) {
178 /* there was a soft quote; string might continue with any quote */
179 continue;
182 /* check patterns that start a nested clause */
183 nestx = nests; i = 0;
184 while (nests && *nestx) {
185 if (!strncmp(in, *nestx, strlen(*nestx))) {
186 /* this nest pattern matches */
187 const char *endnest[2];
188 endnest[0] = nestx[1];
189 endnest[1] = NULL;
191 for (i = strlen(nestx[1]); i > 0; --i) {
192 *out++ = *in++;
193 if (--*len <= 0) { *addr = in; *token = out; return -1; }
196 result =
197 _nestlex(&in, &out, len, endnest, hquotes, squotes, nests,
198 false, c_esc, html_esc);
199 if (result == 0) {
200 /* copy endnest */
201 i = strlen(nestx[1]); while (i > 0) {
202 *out++ = *in++;
203 if (--*len <= 0) {
204 *addr = in;
205 *token = out;
206 return -1;
208 --i;
210 } else if (result < 0) {
211 *addr = in; *token = out; return result;
213 break;
215 nestx += 2; /* skip matching end pattern in table */
217 if (nests && *nestx) {
218 /* we handled a nested expression, continue loop */
219 continue;
222 /* "normal" data, possibly escaped */
223 c = *in++;
224 if (c == '\\') {
225 /* found a plain \ escaped part */
226 c = *in++;
227 if (c == 0) { /* Warn("trailing '\\'");*/ break; }
228 if (c_esc) { /* solve C char escapes: \n \t \0 etc */
229 switch (c) {
230 case '0': c = '\0'; break;
231 case 'a': c = '\a'; break;
232 case 'b': c = '\b'; break;
233 case 'f': c = '\f'; break;
234 case 'n': c = '\n'; break;
235 case 'r': c = '\r'; break;
236 case 't': c = '\t'; break;
237 case 'v': c = '\v'; break;
238 #if LATER
239 case 'x': !!! 1 to 2 hex digits; break;
240 case 'u': !!! 4 hex digits?; break;
241 case 'U': !!! 8 hex digits?; break;
242 #endif
243 default: break;
246 *out++ = c;
247 --*len;
248 if (*len <= 0) {
249 *addr = in;
250 *token = out;
251 return -1; /* output overflow */
253 continue;
256 /* just a simple char */
257 *out++ = c;
258 --*len;
259 if (*len <= 0) {
260 *addr = in;
261 *token = out;
262 return -1; /* output overflow */
266 /* never come here? */
268 *addr = in;
269 *token = out;
270 return 0; /* OK */