1 /* source: nestlex.c */
2 /* Copyright Gerhard Rieger and contributors (see file CHANGES) */
3 /* Published under the GNU General Public License V.2, see file COPYING */
5 /* a function for lexical scanning of nested character patterns */
10 #include "sysincludes.h"
12 static int _nestlex(const char **addr
,
16 const char *hquotes
[],
17 const char *squotes
[],
24 /* sub: scan a string and copy its value to output string
25 end scanning when an unescaped, unnested string from ends array is found
26 does not copy the end pattern
27 does not write a trailing \0 to token
28 allows escaping with \ and quoting (\ and quotes are removed)
29 allows nesting with div. parens
30 returns -1 if out string was too small
31 returns 1 if addr ended unexpectedly
32 returns 0 if token could be extracted successfully
34 int nestlex(const char **addr
, /* input string; aft points to end token */
35 char **token
, /* output token; aft points to first unwritten
36 char (caller might want to set it to \0) */
37 size_t *len
, /* remaining bytes in token space (incl. \0) */
38 const char *ends
[], /* list of end strings */
39 const char *hquotes
[],/* list of strings that quote (hard qu.) */
40 const char *squotes
[],/* list of strings that quote softly */
41 const char *nests
[],/* list of strings that start nesting;
42 every second one is matching end */
43 bool dropquotes
, /* drop the outermost quotes */
44 bool c_esc
, /* solve C char escapes: \n \t \0 etc */
45 bool html_esc
/* solve HTML char escapes: %0d %08 etc */
48 _nestlex(addr
, token
, (ptrdiff_t *)len
, ends
, hquotes
, squotes
, nests
,
49 dropquotes
, c_esc
, html_esc
);
52 static int _nestlex(const char **addr
,
56 const char *hquotes
[],
57 const char *squotes
[],
63 const char *in
= *addr
; /* pointer into input string */
64 const char **endx
; /* loops over end patterns */
65 const char **quotx
; /* loops over quote patterns */
66 const char **nestx
; /* loops over nest patterns */
67 char *out
= *token
; /* pointer into output token */
74 /* is this end of input string? */
77 break; /* end of string */
80 /* first check the end patterns (e.g. for ']') */
83 if (!strncmp(in
, *endx
, strlen(*endx
))) {
84 /* this end pattern matches */
92 /* check for hard quoting pattern */
94 while (hquotes
&& *quotx
) {
95 if (!strncmp(in
, *quotx
, strlen(*quotx
))) {
96 /* this quote pattern matches */
97 const char *endnest
[2];
99 /* we strip this quote */
100 in
+= strlen(*quotx
);
102 for (i
= strlen(*quotx
); i
> 0; --i
) {
104 if (--*len
<= 0) { *addr
= in
; *token
= out
; return -1; }
107 /* we call _nestlex recursively */
111 _nestlex(&in
, &out
, len
, endnest
, NULL
/*hquotes*/,
112 NULL
/*squotes*/, NULL
/*nests*/,
113 false, c_esc
, html_esc
);
114 if (result
== 0 && dropquotes
) {
115 /* we strip this quote */
116 in
+= strlen(*quotx
);
117 } else if (result
< 0) {
118 *addr
= in
; *token
= out
; return result
;
120 /* we copy the trailing quote */
121 for (i
= strlen(*quotx
); i
> 0; --i
) {
123 if (--*len
<= 0) { *addr
= in
; *token
= out
; return -1; }
131 if (hquotes
&& *quotx
!= NULL
) {
132 /* there was a quote; string might continue with hard quote */
136 /* check for soft quoting pattern */
138 while (squotes
&& *quotx
) {
139 if (!strncmp(in
, *quotx
, strlen(*quotx
))) {
140 /* this quote pattern matches */
141 /* we strip this quote */
142 /* we call _nestlex recursively */
143 const char *endnest
[2];
145 /* we strip this quote */
146 in
+= strlen(*quotx
);
148 for (i
= strlen(*quotx
); i
> 0; --i
) {
150 if (--*len
<= 0) { *addr
= in
; *token
= out
; return -1; }
156 _nestlex(&in
, &out
, len
, endnest
, hquotes
,
158 false, c_esc
, html_esc
);
160 if (result
== 0 && dropquotes
) {
161 /* we strip the trailing quote */
162 if (!in
[0] || strncmp(in
, *quotx
, strlen(*quotx
))) return 1;
163 in
+= strlen(*quotx
);
164 } else if (result
< 0) {
165 *addr
= in
; *token
= out
; return result
;
167 /* we copy the trailing quote */
168 for (i
= strlen(*quotx
); i
> 0; --i
) {
170 if (--*len
<= 0) { *addr
= in
; *token
= out
; return -1; }
177 if (squotes
&& *quotx
!= NULL
) {
178 /* there was a soft quote; string might continue with any quote */
182 /* check patterns that start a nested clause */
183 nestx
= nests
; i
= 0;
184 while (nests
&& *nestx
) {
185 if (!strncmp(in
, *nestx
, strlen(*nestx
))) {
186 /* this nest pattern matches */
187 const char *endnest
[2];
188 endnest
[0] = nestx
[1];
191 for (i
= strlen(nestx
[1]); i
> 0; --i
) {
193 if (--*len
<= 0) { *addr
= in
; *token
= out
; return -1; }
197 _nestlex(&in
, &out
, len
, endnest
, hquotes
, squotes
, nests
,
198 false, c_esc
, html_esc
);
201 i
= strlen(nestx
[1]); while (i
> 0) {
210 } else if (result
< 0) {
211 *addr
= in
; *token
= out
; return result
;
215 nestx
+= 2; /* skip matching end pattern in table */
217 if (nests
&& *nestx
) {
218 /* we handled a nested expression, continue loop */
222 /* "normal" data, possibly escaped */
225 /* found a plain \ escaped part */
227 if (c
== 0) { /* Warn("trailing '\\'");*/ break; }
228 if (c_esc
) { /* solve C char escapes: \n \t \0 etc */
230 case '0': c
= '\0'; break;
231 case 'a': c
= '\a'; break;
232 case 'b': c
= '\b'; break;
233 case 'f': c
= '\f'; break;
234 case 'n': c
= '\n'; break;
235 case 'r': c
= '\r'; break;
236 case 't': c
= '\t'; break;
237 case 'v': c
= '\v'; break;
239 case 'x': !!! 1 to
2 hex digits
; break;
240 case 'u': !!! 4 hex digits
?; break;
241 case 'U': !!! 8 hex digits
?; break;
251 return -1; /* output overflow */
256 /* just a simple char */
262 return -1; /* output overflow */
266 /* never come here? */