xmlParser.cc revision 10152:52c552138ba1
1/**
2 ****************************************************************************
3 * <P> XML.c - implementation file for basic XML parser written in ANSI C++
4 * for portability. It works by using recursion and a node tree for breaking
5 * down the elements of an XML document.  </P>
6 *
7 * @version     V2.41
8 * @author      Frank Vanden Berghen
9 *
10 * NOTE:
11 *
12 *   If you add "#define STRICT_PARSING", on the first line of this file
13 *   the parser will see the following XML-stream:
14 *      <a><b>some text</b><b>other text    </a>
15 *   as an error. Otherwise, this tring will be equivalent to:
16 *      <a><b>some text</b><b>other text</b></a>
17 *
18 * NOTE:
19 *
20 *   If you add "#define APPROXIMATE_PARSING" on the first line of this file
21 *   the parser will see the following XML-stream:
22 *     <data name="n1">
23 *     <data name="n2">
24 *     <data name="n3" />
25 *   as equivalent to the following XML-stream:
26 *     <data name="n1" />
27 *     <data name="n2" />
28 *     <data name="n3" />
29 *   This can be useful for badly-formed XML-streams but prevent the use
30 *   of the following XML-stream (problem is: tags at contiguous levels
31 *   have the same names):
32 *     <data name="n1">
33 *        <data name="n2">
34 *            <data name="n3" />
35 *        </data>
36 *     </data>
37 *
38 * NOTE:
39 *
40 *   If you add "#define _XMLPARSER_NO_MESSAGEBOX_" on the first line of this file
41 *   the "openFileHelper" function will always display error messages inside the
42 *   console instead of inside a message-box-window. Message-box-windows are
43 *   available on windows 9x/NT/2000/XP/Vista only.
44 *
45 * The following license terms for the "XMLParser library from Business-Insight" apply to projects
46 * that are in some way related to
47 * the "mcpat project", including applications
48 * using "mcpat project" and tools developed
49 * for enhancing "mcpat project". All other projects
50 * (not related to "mcpat project") have to use the "XMLParser library from Business-Insight"
51 * code under the Aladdin Free Public License (AFPL)
52 * See the file "AFPL-license.txt" for more informations about the AFPL license.
53 * (see http://www.artifex.com/downloads/doc/Public.htm for detailed AFPL terms)
54 *
55 * Redistribution and use of the "XMLParser library from Business-Insight" in source and binary forms, with or without
56 * modification, are permitted provided that the following conditions are met:
57 *     * Redistributions of source code must retain the above copyright
58 *       notice, this list of conditions and the following disclaimer.
59 *     * Redistributions in binary form must reproduce the above copyright
60 *       notice, this list of conditions and the following disclaimer in the
61 *       documentation and/or other materials provided with the distribution.
62 *     * Neither the name of Frank Vanden Berghen nor the
63 *       names of its contributors may be used to endorse or promote products
64 *       derived from this software without specific prior written permission.
65 *
66 * THIS SOFTWARE IS PROVIDED BY Business-Insight ``AS IS'' AND ANY
67 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
68 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
69 * DISCLAIMED. IN NO EVENT SHALL Business-Insight BE LIABLE FOR ANY
70 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
71 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
72 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
73 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
74 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
75 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
76 *
77 * Copyright (c) 2002, Business-Insight
78 * <a href="http://www.Business-Insight.com">Business-Insight</a>
79 * All rights reserved.
80 *
81 ****************************************************************************
82 */
83#ifndef _CRT_SECURE_NO_DEPRECATE
84#define _CRT_SECURE_NO_DEPRECATE
85#endif
86#include "xmlParser.h"
87#ifdef _XMLWINDOWS
88//#ifdef _DEBUG
89//#define _CRTDBG_MAP_ALLOC
90//#include <crtdbg.h>
91//#endif
92#define WIN32_LEAN_AND_MEAN
93#include <Windows.h> // to have IsTextUnicode, MultiByteToWideChar, WideCharToMultiByte to handle unicode files
94                     // to have "MessageBoxA" to display error messages for openFilHelper
95#endif
96
97#include <memory.h>
98
99#include <cassert>
100#include <cstdio>
101#include <cstdlib>
102#include <cstring>
103
104XMLCSTR XMLNode::getVersion() { return _CXML("v2.39"); }
105void freeXMLString(XMLSTR t){if(t)free(t);}
106
107static XMLNode::XMLCharEncoding characterEncoding=XMLNode::char_encoding_UTF8;
108static char guessWideCharChars=1, dropWhiteSpace=1, removeCommentsInMiddleOfText=1;
109
110inline int mmin( const int t1, const int t2 ) { return t1 < t2 ? t1 : t2; }
111
112// You can modify the initialization of the variable "XMLClearTags" below
113// to change the clearTags that are currently recognized by the library.
114// The number on the second columns is the length of the string inside the
115// first column. The "<!DOCTYPE" declaration must be the second in the list.
116// The "<!--" declaration must be the third in the list.
117typedef struct { XMLCSTR lpszOpen; int openTagLen; XMLCSTR lpszClose;} ALLXMLClearTag;
118static ALLXMLClearTag XMLClearTags[] =
119{
120    {    _CXML("<![CDATA["),9,  _CXML("]]>")      },
121    {    _CXML("<!DOCTYPE"),9,  _CXML(">")        },
122    {    _CXML("<!--")     ,4,  _CXML("-->")      },
123    {    _CXML("<PRE>")    ,5,  _CXML("</PRE>")   },
124//  {    _CXML("<Script>") ,8,  _CXML("</Script>")},
125    {    NULL              ,0,  NULL           }
126};
127
128// You can modify the initialization of the variable "XMLEntities" below
129// to change the character entities that are currently recognized by the library.
130// The number on the second columns is the length of the string inside the
131// first column. Additionally, the syntaxes "&#xA0;" and "&#160;" are recognized.
132typedef struct { XMLCSTR s; int l; XMLCHAR c;} XMLCharacterEntity;
133static XMLCharacterEntity XMLEntities[] =
134{
135    { _CXML("&amp;" ), 5, _CXML('&' )},
136    { _CXML("&lt;"  ), 4, _CXML('<' )},
137    { _CXML("&gt;"  ), 4, _CXML('>' )},
138    { _CXML("&quot;"), 6, _CXML('\"')},
139    { _CXML("&apos;"), 6, _CXML('\'')},
140    { NULL           , 0, '\0'    }
141};
142
143// When rendering the XMLNode to a string (using the "createXMLString" function),
144// you can ask for a beautiful formatting. This formatting is using the
145// following indentation character:
146#define INDENTCHAR _CXML('\t')
147
148// The following function parses the XML errors into a user friendly string.
149// You can edit this to change the output language of the library to something else.
150XMLCSTR XMLNode::getError(XMLError xerror)
151{
152    switch (xerror)
153    {
154    case eXMLErrorNone:                  return _CXML("No error");
155    case eXMLErrorMissingEndTag:         return _CXML("Warning: Unmatched end tag");
156    case eXMLErrorNoXMLTagFound:         return _CXML("Warning: No XML tag found");
157    case eXMLErrorEmpty:                 return _CXML("Error: No XML data");
158    case eXMLErrorMissingTagName:        return _CXML("Error: Missing start tag name");
159    case eXMLErrorMissingEndTagName:     return _CXML("Error: Missing end tag name");
160    case eXMLErrorUnmatchedEndTag:       return _CXML("Error: Unmatched end tag");
161    case eXMLErrorUnmatchedEndClearTag:  return _CXML("Error: Unmatched clear tag end");
162    case eXMLErrorUnexpectedToken:       return _CXML("Error: Unexpected token found");
163    case eXMLErrorNoElements:            return _CXML("Error: No elements found");
164    case eXMLErrorFileNotFound:          return _CXML("Error: File not found");
165    case eXMLErrorFirstTagNotFound:      return _CXML("Error: First Tag not found");
166    case eXMLErrorUnknownCharacterEntity:return _CXML("Error: Unknown character entity");
167    case eXMLErrorCharacterCodeAbove255: return _CXML("Error: Character code above 255 is forbidden in MultiByte char mode.");
168    case eXMLErrorCharConversionError:   return _CXML("Error: unable to convert between WideChar and MultiByte chars");
169    case eXMLErrorCannotOpenWriteFile:   return _CXML("Error: unable to open file for writing");
170    case eXMLErrorCannotWriteFile:       return _CXML("Error: cannot write into file");
171
172    case eXMLErrorBase64DataSizeIsNotMultipleOf4: return _CXML("Warning: Base64-string length is not a multiple of 4");
173    case eXMLErrorBase64DecodeTruncatedData:      return _CXML("Warning: Base64-string is truncated");
174    case eXMLErrorBase64DecodeIllegalCharacter:   return _CXML("Error: Base64-string contains an illegal character");
175    case eXMLErrorBase64DecodeBufferTooSmall:     return _CXML("Error: Base64 decode output buffer is too small");
176    };
177    return _CXML("Unknown");
178}
179
180/////////////////////////////////////////////////////////////////////////
181//      Here start the abstraction layer to be OS-independent          //
182/////////////////////////////////////////////////////////////////////////
183
184// Here is an abstraction layer to access some common string manipulation functions.
185// The abstraction layer is currently working for gcc, Microsoft Visual Studio 6.0,
186// Microsoft Visual Studio .NET, CC (sun compiler) and Borland C++.
187// If you plan to "port" the library to a new system/compiler, all you have to do is
188// to edit the following lines.
189#ifdef XML_NO_WIDE_CHAR
190char myIsTextWideChar(const void *b, int len) { return FALSE; }
191#else
192    #if defined (UNDER_CE) || !defined(_XMLWINDOWS)
193    char myIsTextWideChar(const void *b, int len) // inspired by the Wine API: RtlIsTextUnicode
194    {
195#ifdef sun
196        // for SPARC processors: wchar_t* buffers must always be alligned, otherwise it's a char* buffer.
197        if ((((unsigned long)b)%sizeof(wchar_t))!=0) return FALSE;
198#endif
199        const wchar_t *s=(const wchar_t*)b;
200
201        // buffer too small:
202        if (len<(int)sizeof(wchar_t)) return FALSE;
203
204        // odd length test
205        if (len&1) return FALSE;
206
207        /* only checks the first 256 characters */
208        len=mmin(256,len/sizeof(wchar_t));
209
210        // Check for the special byte order:
211        if (*((unsigned short*)s) == 0xFFFE) return TRUE;     // IS_TEXT_UNICODE_REVERSE_SIGNATURE;
212        if (*((unsigned short*)s) == 0xFEFF) return TRUE;      // IS_TEXT_UNICODE_SIGNATURE
213
214        // checks for ASCII characters in the UNICODE stream
215        int i,stats=0;
216        for (i=0; i<len; i++) if (s[i]<=(unsigned short)255) stats++;
217        if (stats>len/2) return TRUE;
218
219        // Check for UNICODE NULL chars
220        for (i=0; i<len; i++) if (!s[i]) return TRUE;
221
222        return FALSE;
223    }
224    #else
225    char myIsTextWideChar(const void *b,int l) { return (char)IsTextUnicode((CONST LPVOID)b,l,NULL); };
226    #endif
227#endif
228
229#ifdef _XMLWINDOWS
230// for Microsoft Visual Studio 6.0 and Microsoft Visual Studio .NET and Borland C++ Builder 6.0
231    #ifdef _XMLWIDECHAR
232        wchar_t *myMultiByteToWideChar(const char *s, XMLNode::XMLCharEncoding ce)
233        {
234            int i;
235            if (ce==XMLNode::char_encoding_UTF8) i=(int)MultiByteToWideChar(CP_UTF8,0             ,s,-1,NULL,0);
236            else                            i=(int)MultiByteToWideChar(CP_ACP ,MB_PRECOMPOSED,s,-1,NULL,0);
237            if (i<0) return NULL;
238            wchar_t *d=(wchar_t *)malloc((i+1)*sizeof(XMLCHAR));
239            if (ce==XMLNode::char_encoding_UTF8) i=(int)MultiByteToWideChar(CP_UTF8,0             ,s,-1,d,i);
240            else                            i=(int)MultiByteToWideChar(CP_ACP ,MB_PRECOMPOSED,s,-1,d,i);
241            d[i]=0;
242            return d;
243        }
244        static inline FILE *xfopen(XMLCSTR filename,XMLCSTR mode) { return _wfopen(filename,mode); }
245        static inline int xstrlen(XMLCSTR c)   { return (int)wcslen(c); }
246        static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return _wcsnicmp(c1,c2,l);}
247        static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { return wcsncmp(c1,c2,l);}
248        static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return _wcsicmp(c1,c2); }
249        static inline XMLSTR xstrstr(XMLCSTR c1, XMLCSTR c2) { return (XMLSTR)wcsstr(c1,c2); }
250        static inline XMLSTR xstrcpy(XMLSTR c1, XMLCSTR c2) { return (XMLSTR)wcscpy(c1,c2); }
251    #else
252        char *myWideCharToMultiByte(const wchar_t *s)
253        {
254            UINT codePage=CP_ACP; if (characterEncoding==XMLNode::char_encoding_UTF8) codePage=CP_UTF8;
255            int i=(int)WideCharToMultiByte(codePage,  // code page
256                0,                       // performance and mapping flags
257                s,                       // wide-character string
258                -1,                       // number of chars in string
259                NULL,                       // buffer for new string
260                0,                       // size of buffer
261                NULL,                    // default for unmappable chars
262                NULL                     // set when default char used
263                );
264            if (i<0) return NULL;
265            char *d=(char*)malloc(i+1);
266            WideCharToMultiByte(codePage,  // code page
267                0,                       // performance and mapping flags
268                s,                       // wide-character string
269                -1,                       // number of chars in string
270                d,                       // buffer for new string
271                i,                       // size of buffer
272                NULL,                    // default for unmappable chars
273                NULL                     // set when default char used
274                );
275            d[i]=0;
276            return d;
277        }
278        static inline FILE *xfopen(XMLCSTR filename,XMLCSTR mode) { return fopen(filename,mode); }
279        static inline int xstrlen(XMLCSTR c)   { return (int)strlen(c); }
280        #ifdef __BORLANDC__
281            static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return strnicmp(c1,c2,l);}
282            static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return stricmp(c1,c2); }
283        #else
284            static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return _strnicmp(c1,c2,l);}
285            static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return _stricmp(c1,c2); }
286        #endif
287        static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { return strncmp(c1,c2,l);}
288        static inline XMLSTR xstrstr(XMLCSTR c1, XMLCSTR c2) { return (XMLSTR)strstr(c1,c2); }
289        static inline XMLSTR xstrcpy(XMLSTR c1, XMLCSTR c2) { return (XMLSTR)strcpy(c1,c2); }
290    #endif
291#else
292// for gcc and CC
293    #ifdef XML_NO_WIDE_CHAR
294        char *myWideCharToMultiByte(const wchar_t *s) { return NULL; }
295    #else
296        char *myWideCharToMultiByte(const wchar_t *s)
297        {
298            const wchar_t *ss=s;
299            int i=(int)wcsrtombs(NULL,&ss,0,NULL);
300            if (i<0) return NULL;
301            char *d=(char *)malloc(i+1);
302            wcsrtombs(d,&s,i,NULL);
303            d[i]=0;
304            return d;
305        }
306    #endif
307    #ifdef _XMLWIDECHAR
308        wchar_t *myMultiByteToWideChar(const char *s, XMLNode::XMLCharEncoding ce)
309        {
310            const char *ss=s;
311            int i=(int)mbsrtowcs(NULL,&ss,0,NULL);
312            if (i<0) return NULL;
313            wchar_t *d=(wchar_t *)malloc((i+1)*sizeof(wchar_t));
314            mbsrtowcs(d,&s,i,NULL);
315            d[i]=0;
316            return d;
317        }
318        int xstrlen(XMLCSTR c)   { return wcslen(c); }
319        #ifdef sun
320        // for CC
321           #include <widec.h>
322           static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return wsncasecmp(c1,c2,l);}
323           static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { return wsncmp(c1,c2,l);}
324           static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return wscasecmp(c1,c2); }
325        #else
326        // for gcc
327           static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return wcsncasecmp(c1,c2,l);}
328           static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { return wcsncmp(c1,c2,l);}
329           static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return wcscasecmp(c1,c2); }
330        #endif
331        static inline XMLSTR xstrstr(XMLCSTR c1, XMLCSTR c2) { return (XMLSTR)wcsstr(c1,c2); }
332        static inline XMLSTR xstrcpy(XMLSTR c1, XMLCSTR c2) { return (XMLSTR)wcscpy(c1,c2); }
333        static inline FILE *xfopen(XMLCSTR filename,XMLCSTR mode)
334        {
335            char *filenameAscii=myWideCharToMultiByte(filename);
336            FILE *f;
337            if (mode[0]==_CXML('r')) f=fopen(filenameAscii,"rb");
338            else                     f=fopen(filenameAscii,"wb");
339            free(filenameAscii);
340            return f;
341        }
342    #else
343        static inline FILE *xfopen(XMLCSTR filename,XMLCSTR mode) { return fopen(filename,mode); }
344        static inline int xstrlen(XMLCSTR c)   { return strlen(c); }
345        static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return strncasecmp(c1,c2,l);}
346        static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { return strncmp(c1,c2,l);}
347        static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return strcasecmp(c1,c2); }
348        static inline XMLSTR xstrstr(XMLCSTR c1, XMLCSTR c2) { return (XMLSTR)strstr(c1,c2); }
349        static inline XMLSTR xstrcpy(XMLSTR c1, XMLCSTR c2) { return (XMLSTR)strcpy(c1,c2); }
350    #endif
351    static inline int _strnicmp(const char *c1,const char *c2, int l) { return strncasecmp(c1,c2,l);}
352#endif
353
354
355///////////////////////////////////////////////////////////////////////////////
356//            the "xmltoc,xmltob,xmltoi,xmltol,xmltof,xmltoa" functions      //
357///////////////////////////////////////////////////////////////////////////////
358// These 6 functions are not used inside the XMLparser.
359// There are only here as "convenience" functions for the user.
360// If you don't need them, you can delete them without any trouble.
361#ifdef _XMLWIDECHAR
362    #ifdef _XMLWINDOWS
363    // for Microsoft Visual Studio 6.0 and Microsoft Visual Studio .NET and Borland C++ Builder 6.0
364        char    xmltob(XMLCSTR t,int     v){ if (t&&(*t)) return (char)_wtoi(t); return v; }
365        int     xmltoi(XMLCSTR t,int     v){ if (t&&(*t)) return _wtoi(t); return v; }
366        long    xmltol(XMLCSTR t,long    v){ if (t&&(*t)) return _wtol(t); return v; }
367        double  xmltof(XMLCSTR t,double  v){ if (t&&(*t)) wscanf(t, "%f", &v); /*v=_wtof(t);*/ return v; }
368    #else
369        #ifdef sun
370        // for CC
371           #include <widec.h>
372           char    xmltob(XMLCSTR t,int     v){ if (t) return (char)wstol(t,NULL,10); return v; }
373           int     xmltoi(XMLCSTR t,int     v){ if (t) return (int)wstol(t,NULL,10); return v; }
374           long    xmltol(XMLCSTR t,long    v){ if (t) return wstol(t,NULL,10); return v; }
375        #else
376        // for gcc
377           char    xmltob(XMLCSTR t,int     v){ if (t) return (char)wcstol(t,NULL,10); return v; }
378           int     xmltoi(XMLCSTR t,int     v){ if (t) return (int)wcstol(t,NULL,10); return v; }
379           long    xmltol(XMLCSTR t,long    v){ if (t) return wcstol(t,NULL,10); return v; }
380        #endif
381                double  xmltof(XMLCSTR t,double  v){ if (t&&(*t)) wscanf(t, "%f", &v); /*v=_wtof(t);*/ return v; }
382    #endif
383#else
384    char    xmltob(XMLCSTR t,char    v){ if (t&&(*t)) return (char)atoi(t); return v; }
385    int     xmltoi(XMLCSTR t,int     v){ if (t&&(*t)) return atoi(t); return v; }
386    long    xmltol(XMLCSTR t,long    v){ if (t&&(*t)) return atol(t); return v; }
387    double  xmltof(XMLCSTR t,double  v){ if (t&&(*t)) return atof(t); return v; }
388#endif
389XMLCSTR xmltoa(XMLCSTR t,XMLCSTR v){ if (t)       return  t; return v; }
390XMLCHAR xmltoc(XMLCSTR t,XMLCHAR v){ if (t&&(*t)) return *t; return v; }
391
392/////////////////////////////////////////////////////////////////////////
393//                    the "openFileHelper" function                    //
394/////////////////////////////////////////////////////////////////////////
395
396// Since each application has its own way to report and deal with errors, you should modify & rewrite
397// the following "openFileHelper" function to get an "error reporting mechanism" tailored to your needs.
398XMLNode XMLNode::openFileHelper(XMLCSTR filename, XMLCSTR tag)
399{
400    // guess the value of the global parameter "characterEncoding"
401    // (the guess is based on the first 200 bytes of the file).
402    FILE *f=xfopen(filename,_CXML("rb"));
403    if (f)
404    {
405        char bb[205];
406        int l=(int)fread(bb,1,200,f);
407        setGlobalOptions(guessCharEncoding(bb,l),guessWideCharChars,dropWhiteSpace,removeCommentsInMiddleOfText);
408        fclose(f);
409    }
410
411    // parse the file
412    XMLResults pResults;
413    XMLNode xnode=XMLNode::parseFile(filename,tag,&pResults);
414
415    // display error message (if any)
416    if (pResults.error != eXMLErrorNone)
417    {
418        // create message
419        char message[2000],*s1=(char*)"",*s3=(char*)""; XMLCSTR s2=_CXML("");
420        if (pResults.error==eXMLErrorFirstTagNotFound) { s1=(char*)"First Tag should be '"; s2=tag; s3=(char*)"'.\n"; }
421        sprintf(message,
422#ifdef _XMLWIDECHAR
423            "XML Parsing error inside file '%S'.\n%S\nAt line %i, column %i.\n%s%S%s"
424#else
425            "XML Parsing error inside file '%s'.\n%s\nAt line %i, column %i.\n%s%s%s"
426#endif
427            ,filename,XMLNode::getError(pResults.error),pResults.nLine,pResults.nColumn,s1,s2,s3);
428
429        // display message
430#if defined(_XMLWINDOWS) && !defined(UNDER_CE) && !defined(_XMLPARSER_NO_MESSAGEBOX_)
431        MessageBoxA(NULL,message,"XML Parsing error",MB_OK|MB_ICONERROR|MB_TOPMOST);
432#else
433        printf("%s",message);
434#endif
435        exit(255);
436    }
437    return xnode;
438}
439
440/////////////////////////////////////////////////////////////////////////
441//      Here start the core implementation of the XMLParser library    //
442/////////////////////////////////////////////////////////////////////////
443
444// You should normally not change anything below this point.
445
446#ifndef _XMLWIDECHAR
447// If "characterEncoding=ascii" then we assume that all characters have the same length of 1 byte.
448// If "characterEncoding=UTF8" then the characters have different lengths (from 1 byte to 4 bytes).
449// If "characterEncoding=ShiftJIS" then the characters have different lengths (from 1 byte to 2 bytes).
450// This table is used as lookup-table to know the length of a character (in byte) based on the
451// content of the first byte of the character.
452// (note: if you modify this, you must always have XML_utf8ByteTable[0]=0 ).
453static const char XML_utf8ByteTable[256] =
454{
455    //  0 1 2 3 4 5 6 7 8 9 a b c d e f
456    0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x00
457    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x10
458    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x20
459    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x30
460    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x40
461    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x50
462    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x60
463    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x70 End of ASCII range
464    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x80 0x80 to 0xc1 invalid
465    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x90
466    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xa0
467    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xb0
468    1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xc0 0xc2 to 0xdf 2 byte
469    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xd0
470    3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,// 0xe0 0xe0 to 0xef 3 byte
471    4,4,4,4,4,1,1,1,1,1,1,1,1,1,1,1 // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid
472};
473static const char XML_legacyByteTable[256] =
474{
475    0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
476    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
477    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
478    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
479    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
480    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
481};
482static const char XML_sjisByteTable[256] =
483{
484    //  0 1 2 3 4 5 6 7 8 9 a b c d e f
485    0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x00
486    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x10
487    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x20
488    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x30
489    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x40
490    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x50
491    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x60
492    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x70
493    1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0x80 0x81 to 0x9F 2 bytes
494    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0x90
495    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xa0
496    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xb0
497    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xc0
498    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xd0
499    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xe0 0xe0 to 0xef 2 bytes
500    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1 // 0xf0
501};
502static const char XML_gb2312ByteTable[256] =
503{
504//  0 1 2 3 4 5 6 7 8 9 a b c d e f
505    0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x00
506    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x10
507    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x20
508    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x30
509    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x40
510    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x50
511    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x60
512    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x70
513    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x80
514    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x90
515    1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xa0 0xa1 to 0xf7 2 bytes
516    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xb0
517    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xc0
518    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xd0
519    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xe0
520    2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1 // 0xf0
521};
522static const char XML_gbk_big5_ByteTable[256] =
523{
524    //  0 1 2 3 4 5 6 7 8 9 a b c d e f
525    0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x00
526    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x10
527    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x20
528    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x30
529    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x40
530    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x50
531    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x60
532    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x70
533    1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0x80 0x81 to 0xfe 2 bytes
534    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0x90
535    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xa0
536    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xb0
537    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xc0
538    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xd0
539    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xe0
540    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1 // 0xf0
541};
542static const char *XML_ByteTable=(const char *)XML_utf8ByteTable; // the default is "characterEncoding=XMLNode::encoding_UTF8"
543#endif
544
545
546XMLNode XMLNode::emptyXMLNode;
547XMLClear XMLNode::emptyXMLClear={ NULL, NULL, NULL};
548XMLAttribute XMLNode::emptyXMLAttribute={ NULL, NULL};
549
550// Enumeration used to decipher what type a token is
551typedef enum XMLTokenTypeTag
552{
553    eTokenText = 0,
554    eTokenQuotedText,
555    eTokenTagStart,         /* "<"            */
556    eTokenTagEnd,           /* "</"           */
557    eTokenCloseTag,         /* ">"            */
558    eTokenEquals,           /* "="            */
559    eTokenDeclaration,      /* "<?"           */
560    eTokenShortHandClose,   /* "/>"           */
561    eTokenClear,
562    eTokenError
563} XMLTokenType;
564
565// Main structure used for parsing XML
566typedef struct XML
567{
568    XMLCSTR                lpXML;
569    XMLCSTR                lpszText;
570    int                    nIndex,nIndexMissigEndTag;
571    enum XMLError          error;
572    XMLCSTR                lpEndTag;
573    int                    cbEndTag;
574    XMLCSTR                lpNewElement;
575    int                    cbNewElement;
576    int                    nFirst;
577} XML;
578
579typedef struct
580{
581    ALLXMLClearTag *pClr;
582    XMLCSTR     pStr;
583} NextToken;
584
585// Enumeration used when parsing attributes
586typedef enum Attrib
587{
588    eAttribName = 0,
589    eAttribEquals,
590    eAttribValue
591} Attrib;
592
593// Enumeration used when parsing elements to dictate whether we are currently
594// inside a tag
595typedef enum Status
596{
597    eInsideTag = 0,
598    eOutsideTag
599} Status;
600
601XMLError XMLNode::writeToFile(XMLCSTR filename, const char *encoding, char nFormat) const
602{
603    if (!d) return eXMLErrorNone;
604    FILE *f=xfopen(filename,_CXML("wb"));
605    if (!f) return eXMLErrorCannotOpenWriteFile;
606#ifdef _XMLWIDECHAR
607    unsigned char h[2]={ 0xFF, 0xFE };
608    if (!fwrite(h,2,1,f)) return eXMLErrorCannotWriteFile;
609    if ((!isDeclaration())&&((d->lpszName)||(!getChildNode().isDeclaration())))
610    {
611        if (!fwrite(L"<?xml version=\"1.0\" encoding=\"utf-16\"?>\n",sizeof(wchar_t)*40,1,f))
612            return eXMLErrorCannotWriteFile;
613    }
614#else
615    if ((!isDeclaration())&&((d->lpszName)||(!getChildNode().isDeclaration())))
616    {
617        if (characterEncoding==char_encoding_UTF8)
618        {
619            // header so that windows recognize the file as UTF-8:
620            unsigned char h[3]={0xEF,0xBB,0xBF}; if (!fwrite(h,3,1,f)) return eXMLErrorCannotWriteFile;
621            encoding="utf-8";
622        } else if (characterEncoding==char_encoding_ShiftJIS) encoding="SHIFT-JIS";
623
624        if (!encoding) encoding="ISO-8859-1";
625        if (fprintf(f,"<?xml version=\"1.0\" encoding=\"%s\"?>\n",encoding)<0) return eXMLErrorCannotWriteFile;
626    } else
627    {
628        if (characterEncoding==char_encoding_UTF8)
629        {
630            unsigned char h[3]={0xEF,0xBB,0xBF}; if (!fwrite(h,3,1,f)) return eXMLErrorCannotWriteFile;
631        }
632    }
633#endif
634    int i;
635    XMLSTR t=createXMLString(nFormat,&i);
636    if (!fwrite(t,sizeof(XMLCHAR)*i,1,f)) return eXMLErrorCannotWriteFile;
637    if (fclose(f)!=0) return eXMLErrorCannotWriteFile;
638    free(t);
639    return eXMLErrorNone;
640}
641
642// Duplicate a given string.
643XMLSTR stringDup(XMLCSTR lpszData, int cbData)
644{
645    if (lpszData==NULL) return NULL;
646
647    XMLSTR lpszNew;
648    if (cbData==-1) cbData=(int)xstrlen(lpszData);
649    lpszNew = (XMLSTR)malloc((cbData+1) * sizeof(XMLCHAR));
650    if (lpszNew)
651    {
652        memcpy(lpszNew, lpszData, (cbData) * sizeof(XMLCHAR));
653        lpszNew[cbData] = (XMLCHAR)NULL;
654    }
655    return lpszNew;
656}
657
658XMLSTR ToXMLStringTool::toXMLUnSafe(XMLSTR dest,XMLCSTR source)
659{
660    XMLSTR dd=dest;
661    XMLCHAR ch;
662    XMLCharacterEntity *entity;
663    while ((ch=*source))
664    {
665        entity=XMLEntities;
666        do
667        {
668            if (ch==entity->c) {xstrcpy(dest,entity->s); dest+=entity->l; source++; goto out_of_loop1; }
669            entity++;
670        } while(entity->s);
671#ifdef _XMLWIDECHAR
672        *(dest++)=*(source++);
673#else
674        switch(XML_ByteTable[(unsigned char)ch])
675        {
676        case 4: *(dest++)=*(source++);
677        case 3: *(dest++)=*(source++);
678        case 2: *(dest++)=*(source++);
679        case 1: *(dest++)=*(source++);
680        }
681#endif
682out_of_loop1:
683        ;
684    }
685    *dest=0;
686    return dd;
687}
688
689// private (used while rendering):
690int ToXMLStringTool::lengthXMLString(XMLCSTR source)
691{
692    int r=0;
693    XMLCharacterEntity *entity;
694    XMLCHAR ch;
695    while ((ch=*source))
696    {
697        entity=XMLEntities;
698        do
699        {
700            if (ch==entity->c) { r+=entity->l; source++; goto out_of_loop1; }
701            entity++;
702        } while(entity->s);
703#ifdef _XMLWIDECHAR
704        r++; source++;
705#else
706        ch=XML_ByteTable[(unsigned char)ch]; r+=ch; source+=ch;
707#endif
708out_of_loop1:
709        ;
710    }
711    return r;
712}
713
714ToXMLStringTool::~ToXMLStringTool(){ freeBuffer(); }
715void ToXMLStringTool::freeBuffer(){ if (buf) free(buf); buf=NULL; buflen=0; }
716XMLSTR ToXMLStringTool::toXML(XMLCSTR source)
717{
718    int l=lengthXMLString(source)+1;
719    if (l>buflen) { buflen=l; buf=(XMLSTR)realloc(buf,l*sizeof(XMLCHAR)); }
720    return toXMLUnSafe(buf,source);
721}
722
723// private:
724XMLSTR fromXMLString(XMLCSTR s, int lo, XML *pXML)
725{
726    // This function is the opposite of the function "toXMLString". It decodes the escape
727    // sequences &amp;, &quot;, &apos;, &lt;, &gt; and replace them by the characters
728    // &,",',<,>. This function is used internally by the XML Parser. All the calls to
729    // the XML library will always gives you back "decoded" strings.
730    //
731    // in: string (s) and length (lo) of string
732    // out:  new allocated string converted from xml
733    if (!s) return NULL;
734
735    int ll=0,j;
736    XMLSTR d;
737    XMLCSTR ss=s;
738    XMLCharacterEntity *entity;
739    while ((lo>0)&&(*s))
740    {
741        if (*s==_CXML('&'))
742        {
743            if ((lo>2)&&(s[1]==_CXML('#')))
744            {
745                s+=2; lo-=2;
746                if ((*s==_CXML('X'))||(*s==_CXML('x'))) { s++; lo--; }
747                while ((*s)&&(*s!=_CXML(';'))&&((lo--)>0)) s++;
748                if (*s!=_CXML(';'))
749                {
750                    pXML->error=eXMLErrorUnknownCharacterEntity;
751                    return NULL;
752                }
753                s++; lo--;
754            } else
755            {
756                entity=XMLEntities;
757                do
758                {
759                    if ((lo>=entity->l)&&(xstrnicmp(s,entity->s,entity->l)==0)) { s+=entity->l; lo-=entity->l; break; }
760                    entity++;
761                } while(entity->s);
762                if (!entity->s)
763                {
764                    pXML->error=eXMLErrorUnknownCharacterEntity;
765                    return NULL;
766                }
767            }
768        } else
769        {
770#ifdef _XMLWIDECHAR
771            s++; lo--;
772#else
773            j=XML_ByteTable[(unsigned char)*s]; s+=j; lo-=j; ll+=j-1;
774#endif
775        }
776        ll++;
777    }
778
779    d=(XMLSTR)malloc((ll+1)*sizeof(XMLCHAR));
780    s=d;
781    while (ll-->0)
782    {
783        if (*ss==_CXML('&'))
784        {
785            if (ss[1]==_CXML('#'))
786            {
787                ss+=2; j=0;
788                if ((*ss==_CXML('X'))||(*ss==_CXML('x')))
789                {
790                    ss++;
791                    while (*ss!=_CXML(';'))
792                    {
793                        if ((*ss>=_CXML('0'))&&(*ss<=_CXML('9'))) j=(j<<4)+*ss-_CXML('0');
794                        else if ((*ss>=_CXML('A'))&&(*ss<=_CXML('F'))) j=(j<<4)+*ss-_CXML('A')+10;
795                        else if ((*ss>=_CXML('a'))&&(*ss<=_CXML('f'))) j=(j<<4)+*ss-_CXML('a')+10;
796                        else { free((void*)s); pXML->error=eXMLErrorUnknownCharacterEntity;return NULL;}
797                        ss++;
798                    }
799                } else
800                {
801                    while (*ss!=_CXML(';'))
802                    {
803                        if ((*ss>=_CXML('0'))&&(*ss<=_CXML('9'))) j=(j*10)+*ss-_CXML('0');
804                        else { free((void*)s); pXML->error=eXMLErrorUnknownCharacterEntity;return NULL;}
805                        ss++;
806                    }
807                }
808#ifndef _XMLWIDECHAR
809                if (j>255) { free((void*)s); pXML->error=eXMLErrorCharacterCodeAbove255;return NULL;}
810#endif
811                (*d++)=(XMLCHAR)j; ss++;
812            } else
813            {
814                entity=XMLEntities;
815                do
816                {
817                    if (xstrnicmp(ss,entity->s,entity->l)==0) { *(d++)=entity->c; ss+=entity->l; break; }
818                    entity++;
819                } while(entity->s);
820            }
821        } else
822        {
823#ifdef _XMLWIDECHAR
824            *(d++)=*(ss++);
825#else
826            switch(XML_ByteTable[(unsigned char)*ss])
827            {
828            case 4: *(d++)=*(ss++); ll--;
829            case 3: *(d++)=*(ss++); ll--;
830            case 2: *(d++)=*(ss++); ll--;
831            case 1: *(d++)=*(ss++);
832            }
833#endif
834        }
835    }
836    *d=0;
837    return (XMLSTR)s;
838}
839
840#define XML_isSPACECHAR(ch) ((ch==_CXML('\n'))||(ch==_CXML(' '))||(ch== _CXML('\t'))||(ch==_CXML('\r')))
841
842// private:
843char myTagCompare(XMLCSTR cclose, XMLCSTR copen)
844// !!!! WARNING strange convention&:
845// return 0 if equals
846// return 1 if different
847{
848    if (!cclose) return 1;
849    int l=(int)xstrlen(cclose);
850    if (xstrnicmp(cclose, copen, l)!=0) return 1;
851    const XMLCHAR c=copen[l];
852    if (XML_isSPACECHAR(c)||
853        (c==_CXML('/' ))||
854        (c==_CXML('<' ))||
855        (c==_CXML('>' ))||
856        (c==_CXML('=' ))) return 0;
857    return 1;
858}
859
860// Obtain the next character from the string.
861static inline XMLCHAR getNextChar(XML *pXML)
862{
863    XMLCHAR ch = pXML->lpXML[pXML->nIndex];
864#ifdef _XMLWIDECHAR
865    if (ch!=0) pXML->nIndex++;
866#else
867    pXML->nIndex+=XML_ByteTable[(unsigned char)ch];
868#endif
869    return ch;
870}
871
872// Find the next token in a string.
873// pcbToken contains the number of characters that have been read.
874static NextToken GetNextToken(XML *pXML, int *pcbToken, enum XMLTokenTypeTag *pType)
875{
876    NextToken        result;
877    XMLCHAR            ch;
878    XMLCHAR            chTemp;
879    int              indexStart,nFoundMatch,nIsText=FALSE;
880    result.pClr=NULL; // prevent warning
881
882    // Find next non-white space character
883    do { indexStart=pXML->nIndex; ch=getNextChar(pXML); } while XML_isSPACECHAR(ch);
884
885    if (ch)
886    {
887        // Cache the current string pointer
888        result.pStr = &pXML->lpXML[indexStart];
889
890        // First check whether the token is in the clear tag list (meaning it
891        // does not need formatting).
892        ALLXMLClearTag *ctag=XMLClearTags;
893        do
894        {
895            if (xstrncmp(ctag->lpszOpen, result.pStr, ctag->openTagLen)==0)
896            {
897                result.pClr=ctag;
898                pXML->nIndex+=ctag->openTagLen-1;
899                *pType=eTokenClear;
900                return result;
901            }
902            ctag++;
903        } while(ctag->lpszOpen);
904
905        // If we didn't find a clear tag then check for standard tokens
906        switch(ch)
907        {
908        // Check for quotes
909        case _CXML('\''):
910        case _CXML('\"'):
911            // Type of token
912            *pType = eTokenQuotedText;
913            chTemp = ch;
914
915            // Set the size
916            nFoundMatch = FALSE;
917
918            // Search through the string to find a matching quote
919            while((ch = getNextChar(pXML)))
920            {
921                if (ch==chTemp) { nFoundMatch = TRUE; break; }
922                if (ch==_CXML('<')) break;
923            }
924
925            // If we failed to find a matching quote
926            if (nFoundMatch == FALSE)
927            {
928                pXML->nIndex=indexStart+1;
929                nIsText=TRUE;
930                break;
931            }
932
933//  4.02.2002
934//            if (FindNonWhiteSpace(pXML)) pXML->nIndex--;
935
936            break;
937
938        // Equals (used with attribute values)
939        case _CXML('='):
940            *pType = eTokenEquals;
941            break;
942
943        // Close tag
944        case _CXML('>'):
945            *pType = eTokenCloseTag;
946            break;
947
948        // Check for tag start and tag end
949        case _CXML('<'):
950
951            // Peek at the next character to see if we have an end tag '</',
952            // or an xml declaration '<?'
953            chTemp = pXML->lpXML[pXML->nIndex];
954
955            // If we have a tag end...
956            if (chTemp == _CXML('/'))
957            {
958                // Set the type and ensure we point at the next character
959                getNextChar(pXML);
960                *pType = eTokenTagEnd;
961            }
962
963            // If we have an XML declaration tag
964            else if (chTemp == _CXML('?'))
965            {
966
967                // Set the type and ensure we point at the next character
968                getNextChar(pXML);
969                *pType = eTokenDeclaration;
970            }
971
972            // Otherwise we must have a start tag
973            else
974            {
975                *pType = eTokenTagStart;
976            }
977            break;
978
979        // Check to see if we have a short hand type end tag ('/>').
980        case _CXML('/'):
981
982            // Peek at the next character to see if we have a short end tag '/>'
983            chTemp = pXML->lpXML[pXML->nIndex];
984
985            // If we have a short hand end tag...
986            if (chTemp == _CXML('>'))
987            {
988                // Set the type and ensure we point at the next character
989                getNextChar(pXML);
990                *pType = eTokenShortHandClose;
991                break;
992            }
993
994            // If we haven't found a short hand closing tag then drop into the
995            // text process
996
997        // Other characters
998        default:
999            nIsText = TRUE;
1000        }
1001
1002        // If this is a TEXT node
1003        if (nIsText)
1004        {
1005            // Indicate we are dealing with text
1006            *pType = eTokenText;
1007            while((ch = getNextChar(pXML)))
1008            {
1009                if XML_isSPACECHAR(ch)
1010                {
1011                    indexStart++; break;
1012
1013                } else if (ch==_CXML('/'))
1014                {
1015                    // If we find a slash then this maybe text or a short hand end tag
1016                    // Peek at the next character to see it we have short hand end tag
1017                    ch=pXML->lpXML[pXML->nIndex];
1018                    // If we found a short hand end tag then we need to exit the loop
1019                    if (ch==_CXML('>')) { pXML->nIndex--; break; }
1020
1021                } else if ((ch==_CXML('<'))||(ch==_CXML('>'))||(ch==_CXML('=')))
1022                {
1023                    pXML->nIndex--; break;
1024                }
1025            }
1026        }
1027        *pcbToken = pXML->nIndex-indexStart;
1028    } else
1029    {
1030        // If we failed to obtain a valid character
1031        *pcbToken = 0;
1032        *pType = eTokenError;
1033        result.pStr=NULL;
1034    }
1035
1036    return result;
1037}
1038
1039XMLCSTR XMLNode::updateName_WOSD(XMLSTR lpszName)
1040{
1041    if (!d) { free(lpszName); return NULL; }
1042    if (d->lpszName&&(lpszName!=d->lpszName)) free((void*)d->lpszName);
1043    d->lpszName=lpszName;
1044    return lpszName;
1045}
1046
1047// private:
1048XMLNode::XMLNode(struct XMLNodeDataTag *p){ d=p; (p->ref_count)++; }
1049XMLNode::XMLNode(XMLNodeData *pParent, XMLSTR lpszName, char isDeclaration)
1050{
1051    d=(XMLNodeData*)malloc(sizeof(XMLNodeData));
1052    d->ref_count=1;
1053
1054    d->lpszName=NULL;
1055    d->nChild= 0;
1056    d->nText = 0;
1057    d->nClear = 0;
1058    d->nAttribute = 0;
1059
1060    d->isDeclaration = isDeclaration;
1061
1062    d->pParent = pParent;
1063    d->pChild= NULL;
1064    d->pText= NULL;
1065    d->pClear= NULL;
1066    d->pAttribute= NULL;
1067    d->pOrder= NULL;
1068
1069    updateName_WOSD(lpszName);
1070}
1071
1072XMLNode XMLNode::createXMLTopNode_WOSD(XMLSTR lpszName, char isDeclaration) { return XMLNode(NULL,lpszName,isDeclaration); }
1073XMLNode XMLNode::createXMLTopNode(XMLCSTR lpszName, char isDeclaration) { return XMLNode(NULL,stringDup(lpszName),isDeclaration); }
1074
1075#define MEMORYINCREASE 50
1076
1077static inline void myFree(void *p) { if (p) free(p); }
1078static inline void *myRealloc(void *p, int newsize, int memInc, int sizeofElem)
1079{
1080    if (p==NULL) { if (memInc) return malloc(memInc*sizeofElem); return malloc(sizeofElem); }
1081    if ((memInc==0)||((newsize%memInc)==0)) p=realloc(p,(newsize+memInc)*sizeofElem);
1082//    if (!p)
1083//    {
1084//        printf("XMLParser Error: Not enough memory! Aborting...\n"); exit(220);
1085//    }
1086    return p;
1087}
1088
1089// private:
1090XMLElementPosition XMLNode::findPosition(XMLNodeData *d, int index, XMLElementType xxtype)
1091{
1092    if (index<0) return -1;
1093    int i=0,j=(int)((index<<2)+xxtype),*o=d->pOrder; while (o[i]!=j) i++; return i;
1094}
1095
1096// private:
1097// update "order" information when deleting a content of a XMLNode
1098int XMLNode::removeOrderElement(XMLNodeData *d, XMLElementType t, int index)
1099{
1100    int n=d->nChild+d->nText+d->nClear, *o=d->pOrder,i=findPosition(d,index,t);
1101    memmove(o+i, o+i+1, (n-i)*sizeof(int));
1102    for (;i<n;i++)
1103        if ((o[i]&3)==(int)t) o[i]-=4;
1104    // We should normally do:
1105    // d->pOrder=(int)realloc(d->pOrder,n*sizeof(int));
1106    // but we skip reallocation because it's too time consuming.
1107    // Anyway, at the end, it will be free'd completely at once.
1108    return i;
1109}
1110
1111void *XMLNode::addToOrder(int memoryIncrease,int *_pos, int nc, void *p, int size, XMLElementType xtype)
1112{
1113    //  in: *_pos is the position inside d->pOrder ("-1" means "EndOf")
1114    // out: *_pos is the index inside p
1115    p=myRealloc(p,(nc+1),memoryIncrease,size);
1116    int n=d->nChild+d->nText+d->nClear;
1117    d->pOrder=(int*)myRealloc(d->pOrder,n+1,memoryIncrease*3,sizeof(int));
1118    int pos=*_pos,*o=d->pOrder;
1119
1120    if ((pos<0)||(pos>=n)) { *_pos=nc; o[n]=(int)((nc<<2)+xtype); return p; }
1121
1122    int i=pos;
1123    memmove(o+i+1, o+i, (n-i)*sizeof(int));
1124
1125    while ((pos<n)&&((o[pos]&3)!=(int)xtype)) pos++;
1126    if (pos==n) { *_pos=nc; o[n]=(int)((nc<<2)+xtype); return p; }
1127
1128    o[i]=o[pos];
1129    for (i=pos+1;i<=n;i++) if ((o[i]&3)==(int)xtype) o[i]+=4;
1130
1131    *_pos=pos=o[pos]>>2;
1132    memmove(((char*)p)+(pos+1)*size,((char*)p)+pos*size,(nc-pos)*size);
1133
1134    return p;
1135}
1136
1137// Add a child node to the given element.
1138XMLNode XMLNode::addChild_priv(int memoryIncrease, XMLSTR lpszName, char isDeclaration, int pos)
1139{
1140    if (!lpszName) return emptyXMLNode;
1141    d->pChild=(XMLNode*)addToOrder(memoryIncrease,&pos,d->nChild,d->pChild,sizeof(XMLNode),eNodeChild);
1142    d->pChild[pos].d=NULL;
1143    d->pChild[pos]=XMLNode(d,lpszName,isDeclaration);
1144    d->nChild++;
1145    return d->pChild[pos];
1146}
1147
1148// Add an attribute to an element.
1149XMLAttribute *XMLNode::addAttribute_priv(int memoryIncrease,XMLSTR lpszName, XMLSTR lpszValuev)
1150{
1151    if (!lpszName) return &emptyXMLAttribute;
1152    if (!d) { myFree(lpszName); myFree(lpszValuev); return &emptyXMLAttribute; }
1153    int nc=d->nAttribute;
1154    d->pAttribute=(XMLAttribute*)myRealloc(d->pAttribute,(nc+1),memoryIncrease,sizeof(XMLAttribute));
1155    XMLAttribute *pAttr=d->pAttribute+nc;
1156    pAttr->lpszName = lpszName;
1157    pAttr->lpszValue = lpszValuev;
1158    d->nAttribute++;
1159    return pAttr;
1160}
1161
1162// Add text to the element.
1163XMLCSTR XMLNode::addText_priv(int memoryIncrease, XMLSTR lpszValue, int pos)
1164{
1165    if (!lpszValue) return NULL;
1166    if (!d) { myFree(lpszValue); return NULL; }
1167    d->pText=(XMLCSTR*)addToOrder(memoryIncrease,&pos,d->nText,d->pText,sizeof(XMLSTR),eNodeText);
1168    d->pText[pos]=lpszValue;
1169    d->nText++;
1170    return lpszValue;
1171}
1172
1173// Add clear (unformatted) text to the element.
1174XMLClear *XMLNode::addClear_priv(int memoryIncrease, XMLSTR lpszValue, XMLCSTR lpszOpen, XMLCSTR lpszClose, int pos)
1175{
1176    if (!lpszValue) return &emptyXMLClear;
1177    if (!d) { myFree(lpszValue); return &emptyXMLClear; }
1178    d->pClear=(XMLClear *)addToOrder(memoryIncrease,&pos,d->nClear,d->pClear,sizeof(XMLClear),eNodeClear);
1179    XMLClear *pNewClear=d->pClear+pos;
1180    pNewClear->lpszValue = lpszValue;
1181    if (!lpszOpen) lpszOpen=XMLClearTags->lpszOpen;
1182    if (!lpszClose) lpszClose=XMLClearTags->lpszClose;
1183    pNewClear->lpszOpenTag = lpszOpen;
1184    pNewClear->lpszCloseTag = lpszClose;
1185    d->nClear++;
1186    return pNewClear;
1187}
1188
1189// private:
1190// Parse a clear (unformatted) type node.
1191char XMLNode::parseClearTag(void *px, void *_pClear)
1192{
1193    XML *pXML=(XML *)px;
1194    ALLXMLClearTag pClear=*((ALLXMLClearTag*)_pClear);
1195    int cbTemp=0;
1196    XMLCSTR lpszTemp=NULL;
1197    XMLCSTR lpXML=&pXML->lpXML[pXML->nIndex];
1198    static XMLCSTR docTypeEnd=_CXML("]>");
1199
1200    // Find the closing tag
1201    // Seems the <!DOCTYPE need a better treatment so lets handle it
1202    if (pClear.lpszOpen==XMLClearTags[1].lpszOpen)
1203    {
1204        XMLCSTR pCh=lpXML;
1205        while (*pCh)
1206        {
1207            if (*pCh==_CXML('<')) { pClear.lpszClose=docTypeEnd; lpszTemp=xstrstr(lpXML,docTypeEnd); break; }
1208            else if (*pCh==_CXML('>')) { lpszTemp=pCh; break; }
1209#ifdef _XMLWIDECHAR
1210            pCh++;
1211#else
1212            pCh+=XML_ByteTable[(unsigned char)(*pCh)];
1213#endif
1214        }
1215    } else lpszTemp=xstrstr(lpXML, pClear.lpszClose);
1216
1217    if (lpszTemp)
1218    {
1219        // Cache the size and increment the index
1220        cbTemp = (int)(lpszTemp - lpXML);
1221
1222        pXML->nIndex += cbTemp+(int)xstrlen(pClear.lpszClose);
1223
1224        // Add the clear node to the current element
1225        addClear_priv(MEMORYINCREASE,stringDup(lpXML,cbTemp), pClear.lpszOpen, pClear.lpszClose,-1);
1226        return 0;
1227    }
1228
1229    // If we failed to find the end tag
1230    pXML->error = eXMLErrorUnmatchedEndClearTag;
1231    return 1;
1232}
1233
1234void XMLNode::exactMemory(XMLNodeData *d)
1235{
1236    if (d->pOrder)     d->pOrder=(int*)realloc(d->pOrder,(d->nChild+d->nText+d->nClear)*sizeof(int));
1237    if (d->pChild)     d->pChild=(XMLNode*)realloc(d->pChild,d->nChild*sizeof(XMLNode));
1238    if (d->pAttribute) d->pAttribute=(XMLAttribute*)realloc(d->pAttribute,d->nAttribute*sizeof(XMLAttribute));
1239    if (d->pText)      d->pText=(XMLCSTR*)realloc(d->pText,d->nText*sizeof(XMLSTR));
1240    if (d->pClear)     d->pClear=(XMLClear *)realloc(d->pClear,d->nClear*sizeof(XMLClear));
1241}
1242
1243char XMLNode::maybeAddTxT(void *pa, XMLCSTR tokenPStr)
1244{
1245    XML *pXML=(XML *)pa;
1246    XMLCSTR lpszText=pXML->lpszText;
1247    if (!lpszText) return 0;
1248    if (dropWhiteSpace) while (XML_isSPACECHAR(*lpszText)&&(lpszText!=tokenPStr)) lpszText++;
1249    int cbText = (int)(tokenPStr - lpszText);
1250    if (!cbText) { pXML->lpszText=NULL; return 0; }
1251    if (dropWhiteSpace) { cbText--; while ((cbText)&&XML_isSPACECHAR(lpszText[cbText])) cbText--; cbText++; }
1252    if (!cbText) { pXML->lpszText=NULL; return 0; }
1253    XMLSTR lpt=fromXMLString(lpszText,cbText,pXML);
1254    if (!lpt) return 1;
1255    pXML->lpszText=NULL;
1256    if (removeCommentsInMiddleOfText && d->nText && d->nClear)
1257    {
1258        // if the previous insertion was a comment (<!-- -->) AND
1259        // if the previous previous insertion was a text then, delete the comment and append the text
1260        int n=d->nChild+d->nText+d->nClear-1,*o=d->pOrder;
1261        if (((o[n]&3)==eNodeClear)&&((o[n-1]&3)==eNodeText))
1262        {
1263            int i=o[n]>>2;
1264            if (d->pClear[i].lpszOpenTag==XMLClearTags[2].lpszOpen)
1265            {
1266                deleteClear(i);
1267                i=o[n-1]>>2;
1268                n=xstrlen(d->pText[i]);
1269                int n2=xstrlen(lpt)+1;
1270                d->pText[i]=(XMLSTR)realloc((void*)d->pText[i],(n+n2)*sizeof(XMLCHAR));
1271                if (!d->pText[i]) return 1;
1272                memcpy((void*)(d->pText[i]+n),lpt,n2*sizeof(XMLCHAR));
1273                free(lpt);
1274                return 0;
1275            }
1276        }
1277    }
1278    addText_priv(MEMORYINCREASE,lpt,-1);
1279    return 0;
1280}
1281// private:
1282// Recursively parse an XML element.
1283int XMLNode::ParseXMLElement(void *pa)
1284{
1285    XML *pXML=(XML *)pa;
1286    int cbToken;
1287    enum XMLTokenTypeTag xtype;
1288    NextToken token;
1289    XMLCSTR lpszTemp=NULL;
1290    int cbTemp=0;
1291    char nDeclaration;
1292    XMLNode pNew;
1293    enum Status status; // inside or outside a tag
1294    enum Attrib attrib = eAttribName;
1295
1296    assert(pXML);
1297
1298    // If this is the first call to the function
1299    if (pXML->nFirst)
1300    {
1301        // Assume we are outside of a tag definition
1302        pXML->nFirst = FALSE;
1303        status = eOutsideTag;
1304    } else
1305    {
1306        // If this is not the first call then we should only be called when inside a tag.
1307        status = eInsideTag;
1308    }
1309
1310    // Iterate through the tokens in the document
1311    for(;;)
1312    {
1313        // Obtain the next token
1314        token = GetNextToken(pXML, &cbToken, &xtype);
1315
1316        if (xtype != eTokenError)
1317        {
1318            // Check the current status
1319            switch(status)
1320            {
1321
1322            // If we are outside of a tag definition
1323            case eOutsideTag:
1324
1325                // Check what type of token we obtained
1326                switch(xtype)
1327                {
1328                // If we have found text or quoted text
1329                case eTokenText:
1330                case eTokenCloseTag:          /* '>'         */
1331                case eTokenShortHandClose:    /* '/>'        */
1332                case eTokenQuotedText:
1333                case eTokenEquals:
1334                    break;
1335
1336                // If we found a start tag '<' and declarations '<?'
1337                case eTokenTagStart:
1338                case eTokenDeclaration:
1339
1340                    // Cache whether this new element is a declaration or not
1341                    nDeclaration = (xtype == eTokenDeclaration);
1342
1343                    // If we have node text then add this to the element
1344                    if (maybeAddTxT(pXML,token.pStr)) return FALSE;
1345
1346                    // Find the name of the tag
1347                    token = GetNextToken(pXML, &cbToken, &xtype);
1348
1349                    // Return an error if we couldn't obtain the next token or
1350                    // it wasnt text
1351                    if (xtype != eTokenText)
1352                    {
1353                        pXML->error = eXMLErrorMissingTagName;
1354                        return FALSE;
1355                    }
1356
1357                    // If we found a new element which is the same as this
1358                    // element then we need to pass this back to the caller..
1359
1360#ifdef APPROXIMATE_PARSING
1361                    if (d->lpszName &&
1362                        myTagCompare(d->lpszName, token.pStr) == 0)
1363                    {
1364                        // Indicate to the caller that it needs to create a
1365                        // new element.
1366                        pXML->lpNewElement = token.pStr;
1367                        pXML->cbNewElement = cbToken;
1368                        return TRUE;
1369                    } else
1370#endif
1371                    {
1372                        // If the name of the new element differs from the name of
1373                        // the current element we need to add the new element to
1374                        // the current one and recurse
1375                        pNew = addChild_priv(MEMORYINCREASE,stringDup(token.pStr,cbToken), nDeclaration,-1);
1376
1377                        while (!pNew.isEmpty())
1378                        {
1379                            // Callself to process the new node.  If we return
1380                            // FALSE this means we dont have any more
1381                            // processing to do...
1382
1383                            if (!pNew.ParseXMLElement(pXML)) return FALSE;
1384                            else
1385                            {
1386                                // If the call to recurse this function
1387                                // evented in a end tag specified in XML then
1388                                // we need to unwind the calls to this
1389                                // function until we find the appropriate node
1390                                // (the element name and end tag name must
1391                                // match)
1392                                if (pXML->cbEndTag)
1393                                {
1394                                    // If we are back at the root node then we
1395                                    // have an unmatched end tag
1396                                    if (!d->lpszName)
1397                                    {
1398                                        pXML->error=eXMLErrorUnmatchedEndTag;
1399                                        return FALSE;
1400                                    }
1401
1402                                    // If the end tag matches the name of this
1403                                    // element then we only need to unwind
1404                                    // once more...
1405
1406                                    if (myTagCompare(d->lpszName, pXML->lpEndTag)==0)
1407                                    {
1408                                        pXML->cbEndTag = 0;
1409                                    }
1410
1411                                    return TRUE;
1412                                } else
1413                                    if (pXML->cbNewElement)
1414                                    {
1415                                        // If the call indicated a new element is to
1416                                        // be created on THIS element.
1417
1418                                        // If the name of this element matches the
1419                                        // name of the element we need to create
1420                                        // then we need to return to the caller
1421                                        // and let it process the element.
1422
1423                                        if (myTagCompare(d->lpszName, pXML->lpNewElement)==0)
1424                                        {
1425                                            return TRUE;
1426                                        }
1427
1428                                        // Add the new element and recurse
1429                                        pNew = addChild_priv(MEMORYINCREASE,stringDup(pXML->lpNewElement,pXML->cbNewElement),0,-1);
1430                                        pXML->cbNewElement = 0;
1431                                    }
1432                                    else
1433                                    {
1434                                        // If we didn't have a new element to create
1435                                        pNew = emptyXMLNode;
1436
1437                                    }
1438                            }
1439                        }
1440                    }
1441                    break;
1442
1443                // If we found an end tag
1444                case eTokenTagEnd:
1445
1446                    // If we have node text then add this to the element
1447                    if (maybeAddTxT(pXML,token.pStr)) return FALSE;
1448
1449                    // Find the name of the end tag
1450                    token = GetNextToken(pXML, &cbTemp, &xtype);
1451
1452                    // The end tag should be text
1453                    if (xtype != eTokenText)
1454                    {
1455                        pXML->error = eXMLErrorMissingEndTagName;
1456                        return FALSE;
1457                    }
1458                    lpszTemp = token.pStr;
1459
1460                    // After the end tag we should find a closing tag
1461                    token = GetNextToken(pXML, &cbToken, &xtype);
1462                    if (xtype != eTokenCloseTag)
1463                    {
1464                        pXML->error = eXMLErrorMissingEndTagName;
1465                        return FALSE;
1466                    }
1467                    pXML->lpszText=pXML->lpXML+pXML->nIndex;
1468
1469                    // We need to return to the previous caller.  If the name
1470                    // of the tag cannot be found we need to keep returning to
1471                    // caller until we find a match
1472                    if (myTagCompare(d->lpszName, lpszTemp) != 0)
1473#ifdef STRICT_PARSING
1474                    {
1475                        pXML->error=eXMLErrorUnmatchedEndTag;
1476                        pXML->nIndexMissigEndTag=pXML->nIndex;
1477                        return FALSE;
1478                    }
1479#else
1480                    {
1481                        pXML->error=eXMLErrorMissingEndTag;
1482                        pXML->nIndexMissigEndTag=pXML->nIndex;
1483                        pXML->lpEndTag = lpszTemp;
1484                        pXML->cbEndTag = cbTemp;
1485                    }
1486#endif
1487
1488                    // Return to the caller
1489                    exactMemory(d);
1490                    return TRUE;
1491
1492                // If we found a clear (unformatted) token
1493                case eTokenClear:
1494                    // If we have node text then add this to the element
1495                    if (maybeAddTxT(pXML,token.pStr)) return FALSE;
1496                    if (parseClearTag(pXML, token.pClr)) return FALSE;
1497                    pXML->lpszText=pXML->lpXML+pXML->nIndex;
1498                    break;
1499
1500                default:
1501                    break;
1502                }
1503                break;
1504
1505            // If we are inside a tag definition we need to search for attributes
1506            case eInsideTag:
1507
1508                // Check what part of the attribute (name, equals, value) we
1509                // are looking for.
1510                switch(attrib)
1511                {
1512                // If we are looking for a new attribute
1513                case eAttribName:
1514
1515                    // Check what the current token type is
1516                    switch(xtype)
1517                    {
1518                    // If the current type is text...
1519                    // Eg.  'attribute'
1520                    case eTokenText:
1521                        // Cache the token then indicate that we are next to
1522                        // look for the equals
1523                        lpszTemp = token.pStr;
1524                        cbTemp = cbToken;
1525                        attrib = eAttribEquals;
1526                        break;
1527
1528                    // If we found a closing tag...
1529                    // Eg.  '>'
1530                    case eTokenCloseTag:
1531                        // We are now outside the tag
1532                        status = eOutsideTag;
1533                        pXML->lpszText=pXML->lpXML+pXML->nIndex;
1534                        break;
1535
1536                    // If we found a short hand '/>' closing tag then we can
1537                    // return to the caller
1538                    case eTokenShortHandClose:
1539                        exactMemory(d);
1540                        pXML->lpszText=pXML->lpXML+pXML->nIndex;
1541                        return TRUE;
1542
1543                    // Errors...
1544                    case eTokenQuotedText:    /* '"SomeText"'   */
1545                    case eTokenTagStart:      /* '<'            */
1546                    case eTokenTagEnd:        /* '</'           */
1547                    case eTokenEquals:        /* '='            */
1548                    case eTokenDeclaration:   /* '<?'           */
1549                    case eTokenClear:
1550                        pXML->error = eXMLErrorUnexpectedToken;
1551                        return FALSE;
1552                    default: break;
1553                    }
1554                    break;
1555
1556                // If we are looking for an equals
1557                case eAttribEquals:
1558                    // Check what the current token type is
1559                    switch(xtype)
1560                    {
1561                    // If the current type is text...
1562                    // Eg.  'Attribute AnotherAttribute'
1563                    case eTokenText:
1564                        // Add the unvalued attribute to the list
1565                        addAttribute_priv(MEMORYINCREASE,stringDup(lpszTemp,cbTemp), NULL);
1566                        // Cache the token then indicate.  We are next to
1567                        // look for the equals attribute
1568                        lpszTemp = token.pStr;
1569                        cbTemp = cbToken;
1570                        break;
1571
1572                    // If we found a closing tag 'Attribute >' or a short hand
1573                    // closing tag 'Attribute />'
1574                    case eTokenShortHandClose:
1575                    case eTokenCloseTag:
1576                        // If we are a declaration element '<?' then we need
1577                        // to remove extra closing '?' if it exists
1578                        pXML->lpszText=pXML->lpXML+pXML->nIndex;
1579
1580                        if (d->isDeclaration &&
1581                            (lpszTemp[cbTemp-1]) == _CXML('?'))
1582                        {
1583                            cbTemp--;
1584                            if (d->pParent && d->pParent->pParent) xtype = eTokenShortHandClose;
1585                        }
1586
1587                        if (cbTemp)
1588                        {
1589                            // Add the unvalued attribute to the list
1590                            addAttribute_priv(MEMORYINCREASE,stringDup(lpszTemp,cbTemp), NULL);
1591                        }
1592
1593                        // If this is the end of the tag then return to the caller
1594                        if (xtype == eTokenShortHandClose)
1595                        {
1596                            exactMemory(d);
1597                            return TRUE;
1598                        }
1599
1600                        // We are now outside the tag
1601                        status = eOutsideTag;
1602                        break;
1603
1604                    // If we found the equals token...
1605                    // Eg.  'Attribute ='
1606                    case eTokenEquals:
1607                        // Indicate that we next need to search for the value
1608                        // for the attribute
1609                        attrib = eAttribValue;
1610                        break;
1611
1612                    // Errors...
1613                    case eTokenQuotedText:    /* 'Attribute "InvalidAttr"'*/
1614                    case eTokenTagStart:      /* 'Attribute <'            */
1615                    case eTokenTagEnd:        /* 'Attribute </'           */
1616                    case eTokenDeclaration:   /* 'Attribute <?'           */
1617                    case eTokenClear:
1618                        pXML->error = eXMLErrorUnexpectedToken;
1619                        return FALSE;
1620                    default: break;
1621                    }
1622                    break;
1623
1624                // If we are looking for an attribute value
1625                case eAttribValue:
1626                    // Check what the current token type is
1627                    switch(xtype)
1628                    {
1629                    // If the current type is text or quoted text...
1630                    // Eg.  'Attribute = "Value"' or 'Attribute = Value' or
1631                    // 'Attribute = 'Value''.
1632                    case eTokenText:
1633                    case eTokenQuotedText:
1634                        // If we are a declaration element '<?' then we need
1635                        // to remove extra closing '?' if it exists
1636                        if (d->isDeclaration &&
1637                            (token.pStr[cbToken-1]) == _CXML('?'))
1638                        {
1639                            cbToken--;
1640                        }
1641
1642                        if (cbTemp)
1643                        {
1644                            // Add the valued attribute to the list
1645                            if (xtype==eTokenQuotedText) { token.pStr++; cbToken-=2; }
1646                            XMLSTR attrVal=(XMLSTR)token.pStr;
1647                            if (attrVal)
1648                            {
1649                                attrVal=fromXMLString(attrVal,cbToken,pXML);
1650                                if (!attrVal) return FALSE;
1651                            }
1652                            addAttribute_priv(MEMORYINCREASE,stringDup(lpszTemp,cbTemp),attrVal);
1653                        }
1654
1655                        // Indicate we are searching for a new attribute
1656                        attrib = eAttribName;
1657                        break;
1658
1659                    // Errors...
1660                    case eTokenTagStart:        /* 'Attr = <'          */
1661                    case eTokenTagEnd:          /* 'Attr = </'         */
1662                    case eTokenCloseTag:        /* 'Attr = >'          */
1663                    case eTokenShortHandClose:  /* "Attr = />"         */
1664                    case eTokenEquals:          /* 'Attr = ='          */
1665                    case eTokenDeclaration:     /* 'Attr = <?'         */
1666                    case eTokenClear:
1667                        pXML->error = eXMLErrorUnexpectedToken;
1668                        return FALSE;
1669                        break;
1670                    default: break;
1671                    }
1672                }
1673            }
1674        }
1675        // If we failed to obtain the next token
1676        else
1677        {
1678            if ((!d->isDeclaration)&&(d->pParent))
1679            {
1680#ifdef STRICT_PARSING
1681                pXML->error=eXMLErrorUnmatchedEndTag;
1682#else
1683                pXML->error=eXMLErrorMissingEndTag;
1684#endif
1685                pXML->nIndexMissigEndTag=pXML->nIndex;
1686            }
1687            maybeAddTxT(pXML,pXML->lpXML+pXML->nIndex);
1688            return FALSE;
1689        }
1690    }
1691}
1692
1693// Count the number of lines and columns in an XML string.
1694static void CountLinesAndColumns(XMLCSTR lpXML, int nUpto, XMLResults *pResults)
1695{
1696    XMLCHAR ch;
1697    assert(lpXML);
1698    assert(pResults);
1699
1700    struct XML xml={ lpXML,lpXML, 0, 0, eXMLErrorNone, NULL, 0, NULL, 0, TRUE };
1701
1702    pResults->nLine = 1;
1703    pResults->nColumn = 1;
1704    while (xml.nIndex<nUpto)
1705    {
1706        ch = getNextChar(&xml);
1707        if (ch != _CXML('\n')) pResults->nColumn++;
1708        else
1709        {
1710            pResults->nLine++;
1711            pResults->nColumn=1;
1712        }
1713    }
1714}
1715
1716// Parse XML and return the root element.
1717XMLNode XMLNode::parseString(XMLCSTR lpszXML, XMLCSTR tag, XMLResults *pResults)
1718{
1719    if (!lpszXML)
1720    {
1721        if (pResults)
1722        {
1723            pResults->error=eXMLErrorNoElements;
1724            pResults->nLine=0;
1725            pResults->nColumn=0;
1726        }
1727        return emptyXMLNode;
1728    }
1729
1730    XMLNode xnode(NULL,NULL,FALSE);
1731    struct XML xml={ lpszXML, lpszXML, 0, 0, eXMLErrorNone, NULL, 0, NULL, 0, TRUE };
1732
1733    // Create header element
1734    xnode.ParseXMLElement(&xml);
1735    enum XMLError error = xml.error;
1736    if (!xnode.nChildNode()) error=eXMLErrorNoXMLTagFound;
1737    if ((xnode.nChildNode()==1)&&(xnode.nElement()==1)) xnode=xnode.getChildNode(); // skip the empty node
1738
1739    // If no error occurred
1740    if ((error==eXMLErrorNone)||(error==eXMLErrorMissingEndTag)||(error==eXMLErrorNoXMLTagFound))
1741    {
1742        XMLCSTR name=xnode.getName();
1743        if (tag&&(*tag)&&((!name)||(xstricmp(name,tag))))
1744        {
1745            xnode=xnode.getChildNode(tag);
1746            if (xnode.isEmpty())
1747            {
1748                if (pResults)
1749                {
1750                    pResults->error=eXMLErrorFirstTagNotFound;
1751                    pResults->nLine=0;
1752                    pResults->nColumn=0;
1753                }
1754                return emptyXMLNode;
1755            }
1756        }
1757    } else
1758    {
1759        // Cleanup: this will destroy all the nodes
1760        xnode = emptyXMLNode;
1761    }
1762
1763
1764    // If we have been given somewhere to place results
1765    if (pResults)
1766    {
1767        pResults->error = error;
1768
1769        // If we have an error
1770        if (error!=eXMLErrorNone)
1771        {
1772            if (error==eXMLErrorMissingEndTag) xml.nIndex=xml.nIndexMissigEndTag;
1773            // Find which line and column it starts on.
1774            CountLinesAndColumns(xml.lpXML, xml.nIndex, pResults);
1775        }
1776    }
1777    return xnode;
1778}
1779
1780XMLNode XMLNode::parseFile(XMLCSTR filename, XMLCSTR tag, XMLResults *pResults)
1781{
1782    if (pResults) { pResults->nLine=0; pResults->nColumn=0; }
1783    FILE *f=xfopen(filename,_CXML("rb"));
1784    if (f==NULL) { if (pResults) pResults->error=eXMLErrorFileNotFound; return emptyXMLNode; }
1785    fseek(f,0,SEEK_END);
1786    int l=ftell(f),headerSz=0;
1787    if (!l) { if (pResults) pResults->error=eXMLErrorEmpty; fclose(f); return emptyXMLNode; }
1788    fseek(f,0,SEEK_SET);
1789    unsigned char *buf=(unsigned char*)malloc(l+4);
1790    l=fread(buf,1,l,f);
1791    fclose(f);
1792    buf[l]=0;buf[l+1]=0;buf[l+2]=0;buf[l+3]=0;
1793#ifdef _XMLWIDECHAR
1794    if (guessWideCharChars)
1795    {
1796        if (!myIsTextWideChar(buf,l))
1797        {
1798            XMLNode::XMLCharEncoding ce=XMLNode::char_encoding_legacy;
1799            if ((buf[0]==0xef)&&(buf[1]==0xbb)&&(buf[2]==0xbf)) { headerSz=3; ce=XMLNode::char_encoding_UTF8; }
1800            XMLSTR b2=myMultiByteToWideChar((const char*)(buf+headerSz),ce);
1801            free(buf); buf=(unsigned char*)b2; headerSz=0;
1802        } else
1803        {
1804            if ((buf[0]==0xef)&&(buf[1]==0xff)) headerSz=2;
1805            if ((buf[0]==0xff)&&(buf[1]==0xfe)) headerSz=2;
1806        }
1807    }
1808#else
1809    if (guessWideCharChars)
1810    {
1811        if (myIsTextWideChar(buf,l))
1812        {
1813            if ((buf[0]==0xef)&&(buf[1]==0xff)) headerSz=2;
1814            if ((buf[0]==0xff)&&(buf[1]==0xfe)) headerSz=2;
1815            char *b2=myWideCharToMultiByte((const wchar_t*)(buf+headerSz));
1816            free(buf); buf=(unsigned char*)b2; headerSz=0;
1817        } else
1818        {
1819            if ((buf[0]==0xef)&&(buf[1]==0xbb)&&(buf[2]==0xbf)) headerSz=3;
1820        }
1821    }
1822#endif
1823
1824    if (!buf) { if (pResults) pResults->error=eXMLErrorCharConversionError; return emptyXMLNode; }
1825    XMLNode x=parseString((XMLSTR)(buf+headerSz),tag,pResults);
1826    free(buf);
1827    return x;
1828}
1829
1830static inline void charmemset(XMLSTR dest,XMLCHAR c,int l) { while (l--) *(dest++)=c; }
1831// private:
1832// Creates an user friendly XML string from a given element with
1833// appropriate white space and carriage returns.
1834//
1835// This recurses through all subnodes then adds contents of the nodes to the
1836// string.
1837int XMLNode::CreateXMLStringR(XMLNodeData *pEntry, XMLSTR lpszMarker, int nFormat)
1838{
1839    int nResult = 0;
1840    int cb=nFormat<0?0:nFormat;
1841    int cbElement;
1842    int nChildFormat=-1;
1843    int nElementI=pEntry->nChild+pEntry->nText+pEntry->nClear;
1844    int i,j;
1845    if ((nFormat>=0)&&(nElementI==1)&&(pEntry->nText==1)&&(!pEntry->isDeclaration)) nFormat=-2;
1846
1847    assert(pEntry);
1848
1849#define LENSTR(lpsz) (lpsz ? xstrlen(lpsz) : 0)
1850
1851    // If the element has no name then assume this is the head node.
1852    cbElement = (int)LENSTR(pEntry->lpszName);
1853
1854    if (cbElement)
1855    {
1856        // "<elementname "
1857        if (lpszMarker)
1858        {
1859            if (cb) charmemset(lpszMarker, INDENTCHAR, cb);
1860            nResult = cb;
1861            lpszMarker[nResult++]=_CXML('<');
1862            if (pEntry->isDeclaration) lpszMarker[nResult++]=_CXML('?');
1863            xstrcpy(&lpszMarker[nResult], pEntry->lpszName);
1864            nResult+=cbElement;
1865            lpszMarker[nResult++]=_CXML(' ');
1866
1867        } else
1868        {
1869            nResult+=cbElement+2+cb;
1870            if (pEntry->isDeclaration) nResult++;
1871        }
1872
1873        // Enumerate attributes and add them to the string
1874        XMLAttribute *pAttr=pEntry->pAttribute;
1875        for (i=0; i<pEntry->nAttribute; i++)
1876        {
1877            // "Attrib
1878            cb = (int)LENSTR(pAttr->lpszName);
1879            if (cb)
1880            {
1881                if (lpszMarker) xstrcpy(&lpszMarker[nResult], pAttr->lpszName);
1882                nResult += cb;
1883                // "Attrib=Value "
1884                if (pAttr->lpszValue)
1885                {
1886                    cb=(int)ToXMLStringTool::lengthXMLString(pAttr->lpszValue);
1887                    if (lpszMarker)
1888                    {
1889                        lpszMarker[nResult]=_CXML('=');
1890                        lpszMarker[nResult+1]=_CXML('"');
1891                        if (cb) ToXMLStringTool::toXMLUnSafe(&lpszMarker[nResult+2],pAttr->lpszValue);
1892                        lpszMarker[nResult+cb+2]=_CXML('"');
1893                    }
1894                    nResult+=cb+3;
1895                }
1896                if (lpszMarker) lpszMarker[nResult] = _CXML(' ');
1897                nResult++;
1898            }
1899            pAttr++;
1900        }
1901
1902        if (pEntry->isDeclaration)
1903        {
1904            if (lpszMarker)
1905            {
1906                lpszMarker[nResult-1]=_CXML('?');
1907                lpszMarker[nResult]=_CXML('>');
1908            }
1909            nResult++;
1910            if (nFormat!=-1)
1911            {
1912                if (lpszMarker) lpszMarker[nResult]=_CXML('\n');
1913                nResult++;
1914            }
1915        } else
1916            // If there are child nodes we need to terminate the start tag
1917            if (nElementI)
1918            {
1919                if (lpszMarker) lpszMarker[nResult-1]=_CXML('>');
1920                if (nFormat>=0)
1921                {
1922                    if (lpszMarker) lpszMarker[nResult]=_CXML('\n');
1923                    nResult++;
1924                }
1925            } else nResult--;
1926    }
1927
1928    // Calculate the child format for when we recurse.  This is used to
1929    // determine the number of spaces used for prefixes.
1930    if (nFormat!=-1)
1931    {
1932        if (cbElement&&(!pEntry->isDeclaration)) nChildFormat=nFormat+1;
1933        else nChildFormat=nFormat;
1934    }
1935
1936    // Enumerate through remaining children
1937    for (i=0; i<nElementI; i++)
1938    {
1939        j=pEntry->pOrder[i];
1940        switch((XMLElementType)(j&3))
1941        {
1942        // Text nodes
1943        case eNodeText:
1944            {
1945                // "Text"
1946                XMLCSTR pChild=pEntry->pText[j>>2];
1947                cb = (int)ToXMLStringTool::lengthXMLString(pChild);
1948                if (cb)
1949                {
1950                    if (nFormat>=0)
1951                    {
1952                        if (lpszMarker)
1953                        {
1954                            charmemset(&lpszMarker[nResult],INDENTCHAR,nFormat+1);
1955                            ToXMLStringTool::toXMLUnSafe(&lpszMarker[nResult+nFormat+1],pChild);
1956                            lpszMarker[nResult+nFormat+1+cb]=_CXML('\n');
1957                        }
1958                        nResult+=cb+nFormat+2;
1959                    } else
1960                    {
1961                        if (lpszMarker) ToXMLStringTool::toXMLUnSafe(&lpszMarker[nResult], pChild);
1962                        nResult += cb;
1963                    }
1964                }
1965                break;
1966            }
1967
1968        // Clear type nodes
1969        case eNodeClear:
1970            {
1971                XMLClear *pChild=pEntry->pClear+(j>>2);
1972                // "OpenTag"
1973                cb = (int)LENSTR(pChild->lpszOpenTag);
1974                if (cb)
1975                {
1976                    if (nFormat!=-1)
1977                    {
1978                        if (lpszMarker)
1979                        {
1980                            charmemset(&lpszMarker[nResult], INDENTCHAR, nFormat+1);
1981                            xstrcpy(&lpszMarker[nResult+nFormat+1], pChild->lpszOpenTag);
1982                        }
1983                        nResult+=cb+nFormat+1;
1984                    }
1985                    else
1986                    {
1987                        if (lpszMarker)xstrcpy(&lpszMarker[nResult], pChild->lpszOpenTag);
1988                        nResult += cb;
1989                    }
1990                }
1991
1992                // "OpenTag Value"
1993                cb = (int)LENSTR(pChild->lpszValue);
1994                if (cb)
1995                {
1996                    if (lpszMarker) xstrcpy(&lpszMarker[nResult], pChild->lpszValue);
1997                    nResult += cb;
1998                }
1999
2000                // "OpenTag Value CloseTag"
2001                cb = (int)LENSTR(pChild->lpszCloseTag);
2002                if (cb)
2003                {
2004                    if (lpszMarker) xstrcpy(&lpszMarker[nResult], pChild->lpszCloseTag);
2005                    nResult += cb;
2006                }
2007
2008                if (nFormat!=-1)
2009                {
2010                    if (lpszMarker) lpszMarker[nResult] = _CXML('\n');
2011                    nResult++;
2012                }
2013                break;
2014            }
2015
2016        // Element nodes
2017        case eNodeChild:
2018            {
2019                // Recursively add child nodes
2020                nResult += CreateXMLStringR(pEntry->pChild[j>>2].d, lpszMarker ? lpszMarker + nResult : 0, nChildFormat);
2021                break;
2022            }
2023        default: break;
2024        }
2025    }
2026
2027    if ((cbElement)&&(!pEntry->isDeclaration))
2028    {
2029        // If we have child entries we need to use long XML notation for
2030        // closing the element - "<elementname>blah blah blah</elementname>"
2031        if (nElementI)
2032        {
2033            // "</elementname>\0"
2034            if (lpszMarker)
2035            {
2036                if (nFormat >=0)
2037                {
2038                    charmemset(&lpszMarker[nResult], INDENTCHAR,nFormat);
2039                    nResult+=nFormat;
2040                }
2041
2042                lpszMarker[nResult]=_CXML('<'); lpszMarker[nResult+1]=_CXML('/');
2043                nResult += 2;
2044                xstrcpy(&lpszMarker[nResult], pEntry->lpszName);
2045                nResult += cbElement;
2046
2047                lpszMarker[nResult]=_CXML('>');
2048                if (nFormat == -1) nResult++;
2049                else
2050                {
2051                    lpszMarker[nResult+1]=_CXML('\n');
2052                    nResult+=2;
2053                }
2054            } else
2055            {
2056                if (nFormat>=0) nResult+=cbElement+4+nFormat;
2057                else if (nFormat==-1) nResult+=cbElement+3;
2058                else nResult+=cbElement+4;
2059            }
2060        } else
2061        {
2062            // If there are no children we can use shorthand XML notation -
2063            // "<elementname/>"
2064            // "/>\0"
2065            if (lpszMarker)
2066            {
2067                lpszMarker[nResult]=_CXML('/'); lpszMarker[nResult+1]=_CXML('>');
2068                if (nFormat != -1) lpszMarker[nResult+2]=_CXML('\n');
2069            }
2070            nResult += nFormat == -1 ? 2 : 3;
2071        }
2072    }
2073
2074    return nResult;
2075}
2076
2077#undef LENSTR
2078
2079// Create an XML string
2080// @param       int nFormat             - 0 if no formatting is required
2081//                                        otherwise nonzero for formatted text
2082//                                        with carriage returns and indentation.
2083// @param       int *pnSize             - [out] pointer to the size of the
2084//                                        returned string not including the
2085//                                        NULL terminator.
2086// @return      XMLSTR                  - Allocated XML string, you must free
2087//                                        this with free().
2088XMLSTR XMLNode::createXMLString(int nFormat, int *pnSize) const
2089{
2090    if (!d) { if (pnSize) *pnSize=0; return NULL; }
2091
2092    XMLSTR lpszResult = NULL;
2093    int cbStr;
2094
2095    // Recursively Calculate the size of the XML string
2096    if (!dropWhiteSpace) nFormat=0;
2097    nFormat = nFormat ? 0 : -1;
2098    cbStr = CreateXMLStringR(d, 0, nFormat);
2099    // Alllocate memory for the XML string + the NULL terminator and
2100    // create the recursively XML string.
2101    lpszResult=(XMLSTR)malloc((cbStr+1)*sizeof(XMLCHAR));
2102    CreateXMLStringR(d, lpszResult, nFormat);
2103    lpszResult[cbStr]=_CXML('\0');
2104    if (pnSize) *pnSize = cbStr;
2105    return lpszResult;
2106}
2107
2108int XMLNode::detachFromParent(XMLNodeData *d)
2109{
2110    XMLNode *pa=d->pParent->pChild;
2111    int i=0;
2112    while (((void*)(pa[i].d))!=((void*)d)) i++;
2113    d->pParent->nChild--;
2114    if (d->pParent->nChild) memmove(pa+i,pa+i+1,(d->pParent->nChild-i)*sizeof(XMLNode));
2115    else { free(pa); d->pParent->pChild=NULL; }
2116    return removeOrderElement(d->pParent,eNodeChild,i);
2117}
2118
2119XMLNode::~XMLNode()
2120{
2121    if (!d) return;
2122    d->ref_count--;
2123    emptyTheNode(0);
2124}
2125void XMLNode::deleteNodeContent()
2126{
2127    if (!d) return;
2128    if (d->pParent) { detachFromParent(d); d->pParent=NULL; d->ref_count--; }
2129    emptyTheNode(1);
2130}
2131void XMLNode::emptyTheNode(char force)
2132{
2133    XMLNodeData *dd=d; // warning: must stay this way!
2134    if ((dd->ref_count==0)||force)
2135    {
2136        if (d->pParent) detachFromParent(d);
2137        int i;
2138        XMLNode *pc;
2139        for(i=0; i<dd->nChild; i++)
2140        {
2141            pc=dd->pChild+i;
2142            pc->d->pParent=NULL;
2143            pc->d->ref_count--;
2144            pc->emptyTheNode(force);
2145        }
2146        myFree(dd->pChild);
2147        for(i=0; i<dd->nText; i++) free((void*)dd->pText[i]);
2148        myFree(dd->pText);
2149        for(i=0; i<dd->nClear; i++) free((void*)dd->pClear[i].lpszValue);
2150        myFree(dd->pClear);
2151        for(i=0; i<dd->nAttribute; i++)
2152        {
2153            free((void*)dd->pAttribute[i].lpszName);
2154            if (dd->pAttribute[i].lpszValue) free((void*)dd->pAttribute[i].lpszValue);
2155        }
2156        myFree(dd->pAttribute);
2157        myFree(dd->pOrder);
2158        myFree((void*)dd->lpszName);
2159        dd->nChild=0;    dd->nText=0;    dd->nClear=0;    dd->nAttribute=0;
2160        dd->pChild=NULL; dd->pText=NULL; dd->pClear=NULL; dd->pAttribute=NULL;
2161        dd->pOrder=NULL; dd->lpszName=NULL; dd->pParent=NULL;
2162    }
2163    if (dd->ref_count==0)
2164    {
2165        free(dd);
2166        d=NULL;
2167    }
2168}
2169
2170XMLNode& XMLNode::operator=( const XMLNode& A )
2171{
2172    // shallow copy
2173    if (this != &A)
2174    {
2175        if (d) { d->ref_count--; emptyTheNode(0); }
2176        d=A.d;
2177        if (d) (d->ref_count) ++ ;
2178    }
2179    return *this;
2180}
2181
2182XMLNode::XMLNode(const XMLNode &A)
2183{
2184    // shallow copy
2185    d=A.d;
2186    if (d) (d->ref_count)++ ;
2187}
2188
2189XMLNode XMLNode::deepCopy() const
2190{
2191    if (!d) return XMLNode::emptyXMLNode;
2192    XMLNode x(NULL,stringDup(d->lpszName),d->isDeclaration);
2193    XMLNodeData *p=x.d;
2194    int n=d->nAttribute;
2195    if (n)
2196    {
2197        p->nAttribute=n; p->pAttribute=(XMLAttribute*)malloc(n*sizeof(XMLAttribute));
2198        while (n--)
2199        {
2200            p->pAttribute[n].lpszName=stringDup(d->pAttribute[n].lpszName);
2201            p->pAttribute[n].lpszValue=stringDup(d->pAttribute[n].lpszValue);
2202        }
2203    }
2204    if (d->pOrder)
2205    {
2206        n=(d->nChild+d->nText+d->nClear)*sizeof(int); p->pOrder=(int*)malloc(n); memcpy(p->pOrder,d->pOrder,n);
2207    }
2208    n=d->nText;
2209    if (n)
2210    {
2211        p->nText=n; p->pText=(XMLCSTR*)malloc(n*sizeof(XMLCSTR));
2212        while(n--) p->pText[n]=stringDup(d->pText[n]);
2213    }
2214    n=d->nClear;
2215    if (n)
2216    {
2217        p->nClear=n; p->pClear=(XMLClear*)malloc(n*sizeof(XMLClear));
2218        while (n--)
2219        {
2220            p->pClear[n].lpszCloseTag=d->pClear[n].lpszCloseTag;
2221            p->pClear[n].lpszOpenTag=d->pClear[n].lpszOpenTag;
2222            p->pClear[n].lpszValue=stringDup(d->pClear[n].lpszValue);
2223        }
2224    }
2225    n=d->nChild;
2226    if (n)
2227    {
2228        p->nChild=n; p->pChild=(XMLNode*)malloc(n*sizeof(XMLNode));
2229        while (n--)
2230        {
2231            p->pChild[n].d=NULL;
2232            p->pChild[n]=d->pChild[n].deepCopy();
2233            p->pChild[n].d->pParent=p;
2234        }
2235    }
2236    return x;
2237}
2238
2239XMLNode XMLNode::addChild(XMLNode childNode, int pos)
2240{
2241    XMLNodeData *dc=childNode.d;
2242    if ((!dc)||(!d)) return childNode;
2243    if (!dc->lpszName)
2244    {
2245        // this is a root node: todo: correct fix
2246        int j=pos;
2247        while (dc->nChild)
2248        {
2249            addChild(dc->pChild[0],j);
2250            if (pos>=0) j++;
2251        }
2252        return childNode;
2253    }
2254    if (dc->pParent) { if ((detachFromParent(dc)<=pos)&&(dc->pParent==d)) pos--; } else dc->ref_count++;
2255    dc->pParent=d;
2256//     int nc=d->nChild;
2257//     d->pChild=(XMLNode*)myRealloc(d->pChild,(nc+1),memoryIncrease,sizeof(XMLNode));
2258    d->pChild=(XMLNode*)addToOrder(0,&pos,d->nChild,d->pChild,sizeof(XMLNode),eNodeChild);
2259    d->pChild[pos].d=dc;
2260    d->nChild++;
2261    return childNode;
2262}
2263
2264void XMLNode::deleteAttribute(int i)
2265{
2266    if ((!d)||(i<0)||(i>=d->nAttribute)) return;
2267    d->nAttribute--;
2268    XMLAttribute *p=d->pAttribute+i;
2269    free((void*)p->lpszName);
2270    if (p->lpszValue) free((void*)p->lpszValue);
2271    if (d->nAttribute) memmove(p,p+1,(d->nAttribute-i)*sizeof(XMLAttribute)); else { free(p); d->pAttribute=NULL; }
2272}
2273
2274void XMLNode::deleteAttribute(XMLAttribute *a){ if (a) deleteAttribute(a->lpszName); }
2275void XMLNode::deleteAttribute(XMLCSTR lpszName)
2276{
2277    int j=0;
2278    getAttribute(lpszName,&j);
2279    if (j) deleteAttribute(j-1);
2280}
2281
2282XMLAttribute *XMLNode::updateAttribute_WOSD(XMLSTR lpszNewValue, XMLSTR lpszNewName,int i)
2283{
2284    if (!d) { if (lpszNewValue) free(lpszNewValue); if (lpszNewName) free(lpszNewName); return NULL; }
2285    if (i>=d->nAttribute)
2286    {
2287        if (lpszNewName) return addAttribute_WOSD(lpszNewName,lpszNewValue);
2288        return NULL;
2289    }
2290    XMLAttribute *p=d->pAttribute+i;
2291    if (p->lpszValue&&p->lpszValue!=lpszNewValue) free((void*)p->lpszValue);
2292    p->lpszValue=lpszNewValue;
2293    if (lpszNewName&&p->lpszName!=lpszNewName) { free((void*)p->lpszName); p->lpszName=lpszNewName; };
2294    return p;
2295}
2296
2297XMLAttribute *XMLNode::updateAttribute_WOSD(XMLAttribute *newAttribute, XMLAttribute *oldAttribute)
2298{
2299    if (oldAttribute) return updateAttribute_WOSD((XMLSTR)newAttribute->lpszValue,(XMLSTR)newAttribute->lpszName,oldAttribute->lpszName);
2300    return addAttribute_WOSD((XMLSTR)newAttribute->lpszName,(XMLSTR)newAttribute->lpszValue);
2301}
2302
2303XMLAttribute *XMLNode::updateAttribute_WOSD(XMLSTR lpszNewValue, XMLSTR lpszNewName,XMLCSTR lpszOldName)
2304{
2305    int j=0;
2306    getAttribute(lpszOldName,&j);
2307    if (j) return updateAttribute_WOSD(lpszNewValue,lpszNewName,j-1);
2308    else
2309    {
2310        if (lpszNewName) return addAttribute_WOSD(lpszNewName,lpszNewValue);
2311        else             return addAttribute_WOSD(stringDup(lpszOldName),lpszNewValue);
2312    }
2313}
2314
2315int XMLNode::indexText(XMLCSTR lpszValue) const
2316{
2317    if (!d) return -1;
2318    int i,l=d->nText;
2319    if (!lpszValue) { if (l) return 0; return -1; }
2320    XMLCSTR *p=d->pText;
2321    for (i=0; i<l; i++) if (lpszValue==p[i]) return i;
2322    return -1;
2323}
2324
2325void XMLNode::deleteText(int i)
2326{
2327    if ((!d)||(i<0)||(i>=d->nText)) return;
2328    d->nText--;
2329    XMLCSTR *p=d->pText+i;
2330    free((void*)*p);
2331    if (d->nText) memmove(p,p+1,(d->nText-i)*sizeof(XMLCSTR)); else { free(p); d->pText=NULL; }
2332    removeOrderElement(d,eNodeText,i);
2333}
2334
2335void XMLNode::deleteText(XMLCSTR lpszValue) { deleteText(indexText(lpszValue)); }
2336
2337XMLCSTR XMLNode::updateText_WOSD(XMLSTR lpszNewValue, int i)
2338{
2339    if (!d) { if (lpszNewValue) free(lpszNewValue); return NULL; }
2340    if (i>=d->nText) return addText_WOSD(lpszNewValue);
2341    XMLCSTR *p=d->pText+i;
2342    if (*p!=lpszNewValue) { free((void*)*p); *p=lpszNewValue; }
2343    return lpszNewValue;
2344}
2345
2346XMLCSTR XMLNode::updateText_WOSD(XMLSTR lpszNewValue, XMLCSTR lpszOldValue)
2347{
2348    if (!d) { if (lpszNewValue) free(lpszNewValue); return NULL; }
2349    int i=indexText(lpszOldValue);
2350    if (i>=0) return updateText_WOSD(lpszNewValue,i);
2351    return addText_WOSD(lpszNewValue);
2352}
2353
2354void XMLNode::deleteClear(int i)
2355{
2356    if ((!d)||(i<0)||(i>=d->nClear)) return;
2357    d->nClear--;
2358    XMLClear *p=d->pClear+i;
2359    free((void*)p->lpszValue);
2360    if (d->nClear) memmove(p,p+1,(d->nClear-i)*sizeof(XMLClear)); else { free(p); d->pClear=NULL; }
2361    removeOrderElement(d,eNodeClear,i);
2362}
2363
2364int XMLNode::indexClear(XMLCSTR lpszValue) const
2365{
2366    if (!d) return -1;
2367    int i,l=d->nClear;
2368    if (!lpszValue) { if (l) return 0; return -1; }
2369    XMLClear *p=d->pClear;
2370    for (i=0; i<l; i++) if (lpszValue==p[i].lpszValue) return i;
2371    return -1;
2372}
2373
2374void XMLNode::deleteClear(XMLCSTR lpszValue) { deleteClear(indexClear(lpszValue)); }
2375void XMLNode::deleteClear(XMLClear *a) { if (a) deleteClear(a->lpszValue); }
2376
2377XMLClear *XMLNode::updateClear_WOSD(XMLSTR lpszNewContent, int i)
2378{
2379    if (!d) { if (lpszNewContent) free(lpszNewContent); return NULL; }
2380    if (i>=d->nClear) return addClear_WOSD(lpszNewContent);
2381    XMLClear *p=d->pClear+i;
2382    if (lpszNewContent!=p->lpszValue) { free((void*)p->lpszValue); p->lpszValue=lpszNewContent; }
2383    return p;
2384}
2385
2386XMLClear *XMLNode::updateClear_WOSD(XMLSTR lpszNewContent, XMLCSTR lpszOldValue)
2387{
2388    if (!d) { if (lpszNewContent) free(lpszNewContent); return NULL; }
2389    int i=indexClear(lpszOldValue);
2390    if (i>=0) return updateClear_WOSD(lpszNewContent,i);
2391    return addClear_WOSD(lpszNewContent);
2392}
2393
2394XMLClear *XMLNode::updateClear_WOSD(XMLClear *newP,XMLClear *oldP)
2395{
2396    if (oldP) return updateClear_WOSD((XMLSTR)newP->lpszValue,(XMLSTR)oldP->lpszValue);
2397    return NULL;
2398}
2399
2400int XMLNode::nChildNode(XMLCSTR name) const
2401{
2402    if (!d) return 0;
2403    int i,j=0,n=d->nChild;
2404    XMLNode *pc=d->pChild;
2405    for (i=0; i<n; i++)
2406    {
2407        if (xstricmp(pc->d->lpszName, name)==0) j++;
2408        pc++;
2409    }
2410    return j;
2411}
2412
2413XMLNode XMLNode::getChildNode(XMLCSTR name, int *j) const
2414{
2415    if (!d) return emptyXMLNode;
2416    int i=0,n=d->nChild;
2417    if (j) i=*j;
2418    XMLNode *pc=d->pChild+i;
2419    for (; i<n; i++)
2420    {
2421        if (!xstricmp(pc->d->lpszName, name))
2422        {
2423            if (j) *j=i+1;
2424            return *pc;
2425        }
2426        pc++;
2427    }
2428    return emptyXMLNode;
2429}
2430
2431XMLNode XMLNode::getChildNode(XMLCSTR name, int j) const
2432{
2433    if (!d) return emptyXMLNode;
2434    if (j>=0)
2435    {
2436        int i=0;
2437        while (j-->0) getChildNode(name,&i);
2438        return getChildNode(name,&i);
2439    }
2440    int i=d->nChild;
2441    while (i--) if (!xstricmp(name,d->pChild[i].d->lpszName)) break;
2442    if (i<0) return emptyXMLNode;
2443    return getChildNode(i);
2444}
2445
2446XMLNode XMLNode::getChildNodeByPath(XMLCSTR _path, char createMissing, XMLCHAR sep)
2447{
2448    XMLSTR path=stringDup(_path);
2449    XMLNode x=getChildNodeByPathNonConst(path,createMissing,sep);
2450    if (path) free(path);
2451    return x;
2452}
2453
2454XMLNode XMLNode::getChildNodeByPathNonConst(XMLSTR path, char createIfMissing, XMLCHAR sep)
2455{
2456    if ((!path)||(!(*path))) return *this;
2457    XMLNode xn,xbase=*this;
2458    XMLCHAR *tend1,sepString[2]; sepString[0]=sep; sepString[1]=0;
2459    tend1=xstrstr(path,sepString);
2460    while(tend1)
2461    {
2462        *tend1=0;
2463        xn=xbase.getChildNode(path);
2464        if (xn.isEmpty())
2465        {
2466            if (createIfMissing) xn=xbase.addChild(path);
2467            else { *tend1=sep; return XMLNode::emptyXMLNode; }
2468        }
2469        *tend1=sep;
2470        xbase=xn;
2471        path=tend1+1;
2472        tend1=xstrstr(path,sepString);
2473    }
2474    xn=xbase.getChildNode(path);
2475    if (xn.isEmpty()&&createIfMissing) xn=xbase.addChild(path);
2476    return xn;
2477}
2478
2479XMLElementPosition XMLNode::positionOfText     (int i) const { if (i>=d->nText ) i=d->nText-1;  return findPosition(d,i,eNodeText ); }
2480XMLElementPosition XMLNode::positionOfClear    (int i) const { if (i>=d->nClear) i=d->nClear-1; return findPosition(d,i,eNodeClear); }
2481XMLElementPosition XMLNode::positionOfChildNode(int i) const { if (i>=d->nChild) i=d->nChild-1; return findPosition(d,i,eNodeChild); }
2482XMLElementPosition XMLNode::positionOfText (XMLCSTR lpszValue) const { return positionOfText (indexText (lpszValue)); }
2483XMLElementPosition XMLNode::positionOfClear(XMLCSTR lpszValue) const { return positionOfClear(indexClear(lpszValue)); }
2484XMLElementPosition XMLNode::positionOfClear(XMLClear *a) const { if (a) return positionOfClear(a->lpszValue); return positionOfClear(); }
2485XMLElementPosition XMLNode::positionOfChildNode(XMLNode x)  const
2486{
2487    if ((!d)||(!x.d)) return -1;
2488    XMLNodeData *dd=x.d;
2489    XMLNode *pc=d->pChild;
2490    int i=d->nChild;
2491    while (i--) if (pc[i].d==dd) return findPosition(d,i,eNodeChild);
2492    return -1;
2493}
2494XMLElementPosition XMLNode::positionOfChildNode(XMLCSTR name, int count) const
2495{
2496    if (!name) return positionOfChildNode(count);
2497    int j=0;
2498    do { getChildNode(name,&j); if (j<0) return -1; } while (count--);
2499    return findPosition(d,j-1,eNodeChild);
2500}
2501
2502XMLNode XMLNode::getChildNodeWithAttribute(XMLCSTR name,XMLCSTR attributeName,XMLCSTR attributeValue, int *k) const
2503{
2504     int i=0,j;
2505     if (k) i=*k;
2506     XMLNode x;
2507     XMLCSTR t;
2508     do
2509     {
2510         x=getChildNode(name,&i);
2511         if (!x.isEmpty())
2512         {
2513             if (attributeValue)
2514             {
2515                 j=0;
2516                 do
2517                 {
2518                     t=x.getAttribute(attributeName,&j);
2519                     if (t&&(xstricmp(attributeValue,t)==0)) { if (k) *k=i; return x; }
2520                 } while (t);
2521             } else
2522             {
2523                 if (x.isAttributeSet(attributeName)) { if (k) *k=i; return x; }
2524             }
2525         }
2526     } while (!x.isEmpty());
2527     return emptyXMLNode;
2528}
2529
2530// Find an attribute on an node.
2531XMLCSTR XMLNode::getAttribute(XMLCSTR lpszAttrib, int *j) const
2532{
2533    if (!d) return NULL;
2534    int i=0,n=d->nAttribute;
2535    if (j) i=*j;
2536    XMLAttribute *pAttr=d->pAttribute+i;
2537    for (; i<n; i++)
2538    {
2539        if (xstricmp(pAttr->lpszName, lpszAttrib)==0)
2540        {
2541            if (j) *j=i+1;
2542            return pAttr->lpszValue;
2543        }
2544        pAttr++;
2545    }
2546    return NULL;
2547}
2548
2549char XMLNode::isAttributeSet(XMLCSTR lpszAttrib) const
2550{
2551    if (!d) return FALSE;
2552    int i,n=d->nAttribute;
2553    XMLAttribute *pAttr=d->pAttribute;
2554    for (i=0; i<n; i++)
2555    {
2556        if (xstricmp(pAttr->lpszName, lpszAttrib)==0)
2557        {
2558            return TRUE;
2559        }
2560        pAttr++;
2561    }
2562    return FALSE;
2563}
2564
2565XMLCSTR XMLNode::getAttribute(XMLCSTR name, int j) const
2566{
2567    if (!d) return NULL;
2568    int i=0;
2569    while (j-->0) getAttribute(name,&i);
2570    return getAttribute(name,&i);
2571}
2572
2573XMLNodeContents XMLNode::enumContents(int i) const
2574{
2575    XMLNodeContents c;
2576    if (!d) { c.etype=eNodeNULL; return c; }
2577    if (i<d->nAttribute)
2578    {
2579        c.etype=eNodeAttribute;
2580        c.attrib=d->pAttribute[i];
2581        return c;
2582    }
2583    i-=d->nAttribute;
2584    c.etype=(XMLElementType)(d->pOrder[i]&3);
2585    i=(d->pOrder[i])>>2;
2586    switch (c.etype)
2587    {
2588    case eNodeChild:     c.child = d->pChild[i];      break;
2589    case eNodeText:      c.text  = d->pText[i];       break;
2590    case eNodeClear:     c.clear = d->pClear[i];      break;
2591    default: break;
2592    }
2593    return c;
2594}
2595
2596XMLCSTR XMLNode::getName() const { if (!d) return NULL; return d->lpszName;   }
2597int XMLNode::nText()       const { if (!d) return 0;    return d->nText;      }
2598int XMLNode::nChildNode()  const { if (!d) return 0;    return d->nChild;     }
2599int XMLNode::nAttribute()  const { if (!d) return 0;    return d->nAttribute; }
2600int XMLNode::nClear()      const { if (!d) return 0;    return d->nClear;     }
2601int XMLNode::nElement()    const { if (!d) return 0;    return d->nAttribute+d->nChild+d->nText+d->nClear; }
2602XMLClear     XMLNode::getClear         (int i) const { if ((!d)||(i>=d->nClear    )) return emptyXMLClear;     return d->pClear[i];     }
2603XMLAttribute XMLNode::getAttribute     (int i) const { if ((!d)||(i>=d->nAttribute)) return emptyXMLAttribute; return d->pAttribute[i]; }
2604XMLCSTR      XMLNode::getAttributeName (int i) const { if ((!d)||(i>=d->nAttribute)) return NULL;              return d->pAttribute[i].lpszName;  }
2605XMLCSTR      XMLNode::getAttributeValue(int i) const { if ((!d)||(i>=d->nAttribute)) return NULL;              return d->pAttribute[i].lpszValue; }
2606XMLCSTR      XMLNode::getText          (int i) const { if ((!d)||(i>=d->nText     )) return NULL;              return d->pText[i];      }
2607XMLNode      XMLNode::getChildNode     (int i) const { if ((!d)||(i>=d->nChild    )) return emptyXMLNode;      return d->pChild[i];     }
2608XMLNode      XMLNode::getParentNode    (     ) const { if ((!d)||(!d->pParent     )) return emptyXMLNode;      return XMLNode(d->pParent); }
2609char         XMLNode::isDeclaration    (     ) const { if (!d) return 0;             return d->isDeclaration; }
2610char         XMLNode::isEmpty          (     ) const { return (d==NULL); }
2611XMLNode       XMLNode::emptyNode       (     )       { return XMLNode::emptyXMLNode; }
2612
2613XMLNode       XMLNode::addChild(XMLCSTR lpszName, char isDeclaration, XMLElementPosition pos)
2614              { return addChild_priv(0,stringDup(lpszName),isDeclaration,pos); }
2615XMLNode       XMLNode::addChild_WOSD(XMLSTR lpszName, char isDeclaration, XMLElementPosition pos)
2616              { return addChild_priv(0,lpszName,isDeclaration,pos); }
2617XMLAttribute *XMLNode::addAttribute(XMLCSTR lpszName, XMLCSTR lpszValue)
2618              { return addAttribute_priv(0,stringDup(lpszName),stringDup(lpszValue)); }
2619XMLAttribute *XMLNode::addAttribute_WOSD(XMLSTR lpszName, XMLSTR lpszValuev)
2620              { return addAttribute_priv(0,lpszName,lpszValuev); }
2621XMLCSTR       XMLNode::addText(XMLCSTR lpszValue, XMLElementPosition pos)
2622              { return addText_priv(0,stringDup(lpszValue),pos); }
2623XMLCSTR       XMLNode::addText_WOSD(XMLSTR lpszValue, XMLElementPosition pos)
2624              { return addText_priv(0,lpszValue,pos); }
2625XMLClear     *XMLNode::addClear(XMLCSTR lpszValue, XMLCSTR lpszOpen, XMLCSTR lpszClose, XMLElementPosition pos)
2626              { return addClear_priv(0,stringDup(lpszValue),lpszOpen,lpszClose,pos); }
2627XMLClear     *XMLNode::addClear_WOSD(XMLSTR lpszValue, XMLCSTR lpszOpen, XMLCSTR lpszClose, XMLElementPosition pos)
2628              { return addClear_priv(0,lpszValue,lpszOpen,lpszClose,pos); }
2629XMLCSTR       XMLNode::updateName(XMLCSTR lpszName)
2630              { return updateName_WOSD(stringDup(lpszName)); }
2631XMLAttribute *XMLNode::updateAttribute(XMLAttribute *newAttribute, XMLAttribute *oldAttribute)
2632              { return updateAttribute_WOSD(stringDup(newAttribute->lpszValue),stringDup(newAttribute->lpszName),oldAttribute->lpszName); }
2633XMLAttribute *XMLNode::updateAttribute(XMLCSTR lpszNewValue, XMLCSTR lpszNewName,int i)
2634              { return updateAttribute_WOSD(stringDup(lpszNewValue),stringDup(lpszNewName),i); }
2635XMLAttribute *XMLNode::updateAttribute(XMLCSTR lpszNewValue, XMLCSTR lpszNewName,XMLCSTR lpszOldName)
2636              { return updateAttribute_WOSD(stringDup(lpszNewValue),stringDup(lpszNewName),lpszOldName); }
2637XMLCSTR       XMLNode::updateText(XMLCSTR lpszNewValue, int i)
2638              { return updateText_WOSD(stringDup(lpszNewValue),i); }
2639XMLCSTR       XMLNode::updateText(XMLCSTR lpszNewValue, XMLCSTR lpszOldValue)
2640              { return updateText_WOSD(stringDup(lpszNewValue),lpszOldValue); }
2641XMLClear     *XMLNode::updateClear(XMLCSTR lpszNewContent, int i)
2642              { return updateClear_WOSD(stringDup(lpszNewContent),i); }
2643XMLClear     *XMLNode::updateClear(XMLCSTR lpszNewValue, XMLCSTR lpszOldValue)
2644              { return updateClear_WOSD(stringDup(lpszNewValue),lpszOldValue); }
2645XMLClear     *XMLNode::updateClear(XMLClear *newP,XMLClear *oldP)
2646              { return updateClear_WOSD(stringDup(newP->lpszValue),oldP->lpszValue); }
2647
2648char XMLNode::setGlobalOptions(XMLCharEncoding _characterEncoding, char _guessWideCharChars,
2649                               char _dropWhiteSpace, char _removeCommentsInMiddleOfText)
2650{
2651    guessWideCharChars=_guessWideCharChars; dropWhiteSpace=_dropWhiteSpace; removeCommentsInMiddleOfText=_removeCommentsInMiddleOfText;
2652#ifdef _XMLWIDECHAR
2653    if (_characterEncoding) characterEncoding=_characterEncoding;
2654#else
2655    switch(_characterEncoding)
2656    {
2657    case char_encoding_UTF8:     characterEncoding=_characterEncoding; XML_ByteTable=XML_utf8ByteTable; break;
2658    case char_encoding_legacy:   characterEncoding=_characterEncoding; XML_ByteTable=XML_legacyByteTable; break;
2659    case char_encoding_ShiftJIS: characterEncoding=_characterEncoding; XML_ByteTable=XML_sjisByteTable; break;
2660    case char_encoding_GB2312:   characterEncoding=_characterEncoding; XML_ByteTable=XML_gb2312ByteTable; break;
2661    case char_encoding_Big5:
2662    case char_encoding_GBK:      characterEncoding=_characterEncoding; XML_ByteTable=XML_gbk_big5_ByteTable; break;
2663    default: return 1;
2664    }
2665#endif
2666    return 0;
2667}
2668
2669XMLNode::XMLCharEncoding XMLNode::guessCharEncoding(void *buf,int l, char useXMLEncodingAttribute)
2670{
2671#ifdef _XMLWIDECHAR
2672    return (XMLCharEncoding)0;
2673#else
2674    if (l<25) return (XMLCharEncoding)0;
2675    if (guessWideCharChars&&(myIsTextWideChar(buf,l))) return (XMLCharEncoding)0;
2676    unsigned char *b=(unsigned char*)buf;
2677    if ((b[0]==0xef)&&(b[1]==0xbb)&&(b[2]==0xbf)) return char_encoding_UTF8;
2678
2679    // Match utf-8 model ?
2680    XMLCharEncoding bestGuess=char_encoding_UTF8;
2681    int i=0;
2682    while (i<l)
2683        switch (XML_utf8ByteTable[b[i]])
2684        {
2685        case 4: i++; if ((i<l)&&(b[i]& 0xC0)!=0x80) { bestGuess=char_encoding_legacy; i=l; } // 10bbbbbb ?
2686        case 3: i++; if ((i<l)&&(b[i]& 0xC0)!=0x80) { bestGuess=char_encoding_legacy; i=l; } // 10bbbbbb ?
2687        case 2: i++; if ((i<l)&&(b[i]& 0xC0)!=0x80) { bestGuess=char_encoding_legacy; i=l; } // 10bbbbbb ?
2688        case 1: i++; break;
2689        case 0: i=l;
2690        }
2691    if (!useXMLEncodingAttribute) return bestGuess;
2692    // if encoding is specified and different from utf-8 than it's non-utf8
2693    // otherwise it's utf-8
2694    char bb[201];
2695    l=mmin(l,200);
2696    memcpy(bb,buf,l); // copy buf into bb to be able to do "bb[l]=0"
2697    bb[l]=0;
2698    b=(unsigned char*)strstr(bb,"encoding");
2699    if (!b) return bestGuess;
2700    b+=8; while XML_isSPACECHAR(*b) b++; if (*b!='=') return bestGuess;
2701    b++;  while XML_isSPACECHAR(*b) b++; if ((*b!='\'')&&(*b!='"')) return bestGuess;
2702    b++;  while XML_isSPACECHAR(*b) b++;
2703
2704    if ((xstrnicmp((char*)b,"utf-8",5)==0)||
2705        (xstrnicmp((char*)b,"utf8",4)==0))
2706    {
2707        if (bestGuess==char_encoding_legacy) return char_encoding_error;
2708        return char_encoding_UTF8;
2709    }
2710
2711    if ((xstrnicmp((char*)b,"shiftjis",8)==0)||
2712        (xstrnicmp((char*)b,"shift-jis",9)==0)||
2713        (xstrnicmp((char*)b,"sjis",4)==0)) return char_encoding_ShiftJIS;
2714
2715    if (xstrnicmp((char*)b,"GB2312",6)==0) return char_encoding_GB2312;
2716    if (xstrnicmp((char*)b,"Big5",4)==0) return char_encoding_Big5;
2717    if (xstrnicmp((char*)b,"GBK",3)==0) return char_encoding_GBK;
2718
2719    return char_encoding_legacy;
2720#endif
2721}
2722#undef XML_isSPACECHAR
2723
2724//////////////////////////////////////////////////////////
2725//      Here starts the base64 conversion functions.    //
2726//////////////////////////////////////////////////////////
2727
2728static const char base64Fillchar = _CXML('='); // used to mark partial words at the end
2729
2730// this lookup table defines the base64 encoding
2731XMLCSTR base64EncodeTable=_CXML("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/");
2732
2733// Decode Table gives the index of any valid base64 character in the Base64 table]
2734// 96: '='  -   97: space char   -   98: illegal char   -   99: end of string
2735const unsigned char base64DecodeTable[] = {
2736    99,98,98,98,98,98,98,98,98,97,  97,98,98,97,98,98,98,98,98,98,  98,98,98,98,98,98,98,98,98,98,  //00 -29
2737    98,98,97,98,98,98,98,98,98,98,  98,98,98,62,98,98,98,63,52,53,  54,55,56,57,58,59,60,61,98,98,  //30 -59
2738    98,96,98,98,98, 0, 1, 2, 3, 4,   5, 6, 7, 8, 9,10,11,12,13,14,  15,16,17,18,19,20,21,22,23,24,  //60 -89
2739    25,98,98,98,98,98,98,26,27,28,  29,30,31,32,33,34,35,36,37,38,  39,40,41,42,43,44,45,46,47,48,  //90 -119
2740    49,50,51,98,98,98,98,98,98,98,  98,98,98,98,98,98,98,98,98,98,  98,98,98,98,98,98,98,98,98,98,  //120 -149
2741    98,98,98,98,98,98,98,98,98,98,  98,98,98,98,98,98,98,98,98,98,  98,98,98,98,98,98,98,98,98,98,  //150 -179
2742    98,98,98,98,98,98,98,98,98,98,  98,98,98,98,98,98,98,98,98,98,  98,98,98,98,98,98,98,98,98,98,  //180 -209
2743    98,98,98,98,98,98,98,98,98,98,  98,98,98,98,98,98,98,98,98,98,  98,98,98,98,98,98,98,98,98,98,  //210 -239
2744    98,98,98,98,98,98,98,98,98,98,  98,98,98,98,98,98                                               //240 -255
2745};
2746
2747XMLParserBase64Tool::~XMLParserBase64Tool(){ freeBuffer(); }
2748
2749void XMLParserBase64Tool::freeBuffer(){ if (buf) free(buf); buf=NULL; buflen=0; }
2750
2751int XMLParserBase64Tool::encodeLength(int inlen, char formatted)
2752{
2753    unsigned int i=((inlen-1)/3*4+4+1);
2754    if (formatted) i+=inlen/54;
2755    return i;
2756}
2757
2758XMLSTR XMLParserBase64Tool::encode(unsigned char *inbuf, unsigned int inlen, char formatted)
2759{
2760    int i=encodeLength(inlen,formatted),k=17,eLen=inlen/3,j;
2761    alloc(i*sizeof(XMLCHAR));
2762    XMLSTR curr=(XMLSTR)buf;
2763    for(i=0;i<eLen;i++)
2764    {
2765        // Copy next three bytes into lower 24 bits of int, paying attention to sign.
2766        j=(inbuf[0]<<16)|(inbuf[1]<<8)|inbuf[2]; inbuf+=3;
2767        // Encode the int into four chars
2768        *(curr++)=base64EncodeTable[ j>>18      ];
2769        *(curr++)=base64EncodeTable[(j>>12)&0x3f];
2770        *(curr++)=base64EncodeTable[(j>> 6)&0x3f];
2771        *(curr++)=base64EncodeTable[(j    )&0x3f];
2772        if (formatted) { if (!k) { *(curr++)=_CXML('\n'); k=18; } k--; }
2773    }
2774    eLen=inlen-eLen*3; // 0 - 2.
2775    if (eLen==1)
2776    {
2777        *(curr++)=base64EncodeTable[ inbuf[0]>>2      ];
2778        *(curr++)=base64EncodeTable[(inbuf[0]<<4)&0x3F];
2779        *(curr++)=base64Fillchar;
2780        *(curr++)=base64Fillchar;
2781    } else if (eLen==2)
2782    {
2783        j=(inbuf[0]<<8)|inbuf[1];
2784        *(curr++)=base64EncodeTable[ j>>10      ];
2785        *(curr++)=base64EncodeTable[(j>> 4)&0x3f];
2786        *(curr++)=base64EncodeTable[(j<< 2)&0x3f];
2787        *(curr++)=base64Fillchar;
2788    }
2789    *(curr++)=0;
2790    return (XMLSTR)buf;
2791}
2792
2793unsigned int XMLParserBase64Tool::decodeSize(XMLCSTR data,XMLError *xe)
2794{
2795     if (xe) *xe=eXMLErrorNone;
2796    int size=0;
2797    unsigned char c;
2798    //skip any extra characters (e.g. newlines or spaces)
2799    while (*data)
2800    {
2801#ifdef _XMLWIDECHAR
2802        if (*data>255) { if (xe) *xe=eXMLErrorBase64DecodeIllegalCharacter; return 0; }
2803#endif
2804        c=base64DecodeTable[(unsigned char)(*data)];
2805        if (c<97) size++;
2806        else if (c==98) { if (xe) *xe=eXMLErrorBase64DecodeIllegalCharacter; return 0; }
2807        data++;
2808    }
2809    if (xe&&(size%4!=0)) *xe=eXMLErrorBase64DataSizeIsNotMultipleOf4;
2810    if (size==0) return 0;
2811    do { data--; size--; } while(*data==base64Fillchar); size++;
2812    return (unsigned int)((size*3)/4);
2813}
2814
2815unsigned char XMLParserBase64Tool::decode(XMLCSTR data, unsigned char *buf, int len, XMLError *xe)
2816{
2817    if (xe) *xe=eXMLErrorNone;
2818    int i=0,p=0;
2819    unsigned char d,c;
2820    for(;;)
2821    {
2822
2823#ifdef _XMLWIDECHAR
2824#define BASE64DECODE_READ_NEXT_CHAR(c)                                              \
2825        do {                                                                        \
2826            if (data[i]>255){ c=98; break; }                                        \
2827            c=base64DecodeTable[(unsigned char)data[i++]];                       \
2828        }while (c==97);                                                             \
2829        if(c==98){ if(xe)*xe=eXMLErrorBase64DecodeIllegalCharacter; return 0; }
2830#else
2831#define BASE64DECODE_READ_NEXT_CHAR(c)                                           \
2832        do { c=base64DecodeTable[(unsigned char)data[i++]]; }while (c==97);   \
2833        if(c==98){ if(xe)*xe=eXMLErrorBase64DecodeIllegalCharacter; return 0; }
2834#endif
2835
2836        BASE64DECODE_READ_NEXT_CHAR(c)
2837        if (c==99) { return 2; }
2838        if (c==96)
2839        {
2840            if (p==(int)len) return 2;
2841            if (xe) *xe=eXMLErrorBase64DecodeTruncatedData;
2842            return 1;
2843        }
2844
2845        BASE64DECODE_READ_NEXT_CHAR(d)
2846        if ((d==99)||(d==96)) { if (xe) *xe=eXMLErrorBase64DecodeTruncatedData;  return 1; }
2847        if (p==(int)len) {      if (xe) *xe=eXMLErrorBase64DecodeBufferTooSmall; return 0; }
2848        buf[p++]=(unsigned char)((c<<2)|((d>>4)&0x3));
2849
2850        BASE64DECODE_READ_NEXT_CHAR(c)
2851        if (c==99) { if (xe) *xe=eXMLErrorBase64DecodeTruncatedData;  return 1; }
2852        if (p==(int)len)
2853        {
2854            if (c==96) return 2;
2855            if (xe) *xe=eXMLErrorBase64DecodeBufferTooSmall;
2856            return 0;
2857        }
2858        if (c==96) { if (xe) *xe=eXMLErrorBase64DecodeTruncatedData;  return 1; }
2859        buf[p++]=(unsigned char)(((d<<4)&0xf0)|((c>>2)&0xf));
2860
2861        BASE64DECODE_READ_NEXT_CHAR(d)
2862        if (d==99 ) { if (xe) *xe=eXMLErrorBase64DecodeTruncatedData;  return 1; }
2863        if (p==(int)len)
2864        {
2865            if (d==96) return 2;
2866            if (xe) *xe=eXMLErrorBase64DecodeBufferTooSmall;
2867            return 0;
2868        }
2869        if (d==96) { if (xe) *xe=eXMLErrorBase64DecodeTruncatedData;  return 1; }
2870        buf[p++]=(unsigned char)(((c<<6)&0xc0)|d);
2871    }
2872}
2873#undef BASE64DECODE_READ_NEXT_CHAR
2874
2875void XMLParserBase64Tool::alloc(int newsize)
2876{
2877    if ((!buf)&&(newsize)) { buf=malloc(newsize); buflen=newsize; return; }
2878    if (newsize>buflen) { buf=realloc(buf,newsize); buflen=newsize; }
2879}
2880
2881unsigned char *XMLParserBase64Tool::decode(XMLCSTR data, int *outlen, XMLError *xe)
2882{
2883    if (xe) *xe=eXMLErrorNone;
2884    unsigned int len=decodeSize(data,xe);
2885    if (outlen) *outlen=len;
2886    if (!len) return NULL;
2887    alloc(len+1);
2888    if(!decode(data,(unsigned char*)buf,len,xe)){ return NULL; }
2889    return (unsigned char*)buf;
2890}
2891
2892