1/*
2 * Summary: the core parser module
3 * Description: Interfaces, constants and types related to the XML parser
4 *
5 * Copy: See Copyright for the status of this software.
6 *
7 * Author: Daniel Veillard
8 */
9
10#ifndef __XML_PARSER_H__
11#define __XML_PARSER_H__
12
13#include <stdarg.h>
14
15#include <libxml/xmlversion.h>
16#include <libxml/tree.h>
17#include <libxml/dict.h>
18#include <libxml/hash.h>
19#include <libxml/valid.h>
20#include <libxml/entities.h>
21#include <libxml/xmlerror.h>
22#include <libxml/xmlstring.h>
23
24#ifdef __cplusplus
25extern "C" {
26#endif
27
28/**
29 * XML_DEFAULT_VERSION:
30 *
31 * The default version of XML used: 1.0
32 */
33#define XML_DEFAULT_VERSION "1.0"
34
35/**
36 * xmlParserInput:
37 *
38 * An xmlParserInput is an input flow for the XML processor.
39 * Each entity parsed is associated an xmlParserInput (except the
40 * few predefined ones). This is the case both for internal entities
41 * - in which case the flow is already completely in memory - or
42 * external entities - in which case we use the buf structure for
43 * progressive reading and I18N conversions to the internal UTF-8 format.
44 */
45
46/**
47 * xmlParserInputDeallocate:
48 * @str: the string to deallocate
49 *
50 * Callback for freeing some parser input allocations.
51 */
52typedef void (* xmlParserInputDeallocate)(xmlChar *str);
53
54struct _xmlParserInput {
55 /* Input buffer */
56 xmlParserInputBufferPtr buf; /* UTF-8 encoded buffer */
57
58 const char *filename; /* The file analyzed, if any */
59 const char *directory; /* the directory/base of the file */
60 const xmlChar *base; /* Base of the array to parse */
61 const xmlChar *cur; /* Current char being parsed */
62 const xmlChar *end; /* end of the array to parse */
63 int length; /* length if known */
64 int line; /* Current line */
65 int col; /* Current column */
66 /*
67 * NOTE: consumed is only tested for equality in the parser code,
68 * so even if there is an overflow this should not give troubles
69 * for parsing very large instances.
70 */
71 unsigned long consumed; /* How many xmlChars already consumed */
72 xmlParserInputDeallocate free; /* function to deallocate the base */
73 const xmlChar *encoding; /* the encoding string for entity */
74 const xmlChar *version; /* the version string for entity */
75 int standalone; /* Was that entity marked standalone */
76 int id; /* an unique identifier for the entity */
77};
78
79/**
80 * xmlParserNodeInfo:
81 *
82 * The parser can be asked to collect Node information, i.e. at what
83 * place in the file they were detected.
84 * NOTE: This is off by default and not very well tested.
85 */
86typedef struct _xmlParserNodeInfo xmlParserNodeInfo;
87typedef xmlParserNodeInfo *xmlParserNodeInfoPtr;
88
89struct _xmlParserNodeInfo {
90 const struct _xmlNode* node;
91 /* Position & line # that text that created the node begins & ends on */
92 unsigned long begin_pos;
93 unsigned long begin_line;
94 unsigned long end_pos;
95 unsigned long end_line;
96};
97
98typedef struct _xmlParserNodeInfoSeq xmlParserNodeInfoSeq;
99typedef xmlParserNodeInfoSeq *xmlParserNodeInfoSeqPtr;
100struct _xmlParserNodeInfoSeq {
101 unsigned long maximum;
102 unsigned long length;
103 xmlParserNodeInfo* buffer;
104};
105
106/**
107 * xmlParserInputState:
108 *
109 * The parser is now working also as a state based parser.
110 * The recursive one use the state info for entities processing.
111 */
112typedef enum {
113 XML_PARSER_EOF = -1, /* nothing is to be parsed */
114 XML_PARSER_START = 0, /* nothing has been parsed */
115 XML_PARSER_MISC, /* Misc* before int subset */
116 XML_PARSER_PI, /* Within a processing instruction */
117 XML_PARSER_DTD, /* within some DTD content */
118 XML_PARSER_PROLOG, /* Misc* after internal subset */
119 XML_PARSER_COMMENT, /* within a comment */
120 XML_PARSER_START_TAG, /* within a start tag */
121 XML_PARSER_CONTENT, /* within the content */
122 XML_PARSER_CDATA_SECTION, /* within a CDATA section */
123 XML_PARSER_END_TAG, /* within a closing tag */
124 XML_PARSER_ENTITY_DECL, /* within an entity declaration */
125 XML_PARSER_ENTITY_VALUE, /* within an entity value in a decl */
126 XML_PARSER_ATTRIBUTE_VALUE, /* within an attribute value */
127 XML_PARSER_SYSTEM_LITERAL, /* within a SYSTEM value */
128 XML_PARSER_EPILOG, /* the Misc* after the last end tag */
129 XML_PARSER_IGNORE, /* within an IGNORED section */
130 XML_PARSER_PUBLIC_LITERAL /* within a PUBLIC value */
131} xmlParserInputState;
132
133/**
134 * XML_DETECT_IDS:
135 *
136 * Bit in the loadsubset context field to tell to do ID/REFs lookups.
137 * Use it to initialize xmlLoadExtDtdDefaultValue.
138 */
139#define XML_DETECT_IDS 2
140
141/**
142 * XML_COMPLETE_ATTRS:
143 *
144 * Bit in the loadsubset context field to tell to do complete the
145 * elements attributes lists with the ones defaulted from the DTDs.
146 * Use it to initialize xmlLoadExtDtdDefaultValue.
147 */
148#define XML_COMPLETE_ATTRS 4
149
150/**
151 * XML_SKIP_IDS:
152 *
153 * Bit in the loadsubset context field to tell to not do ID/REFs registration.
154 * Used to initialize xmlLoadExtDtdDefaultValue in some special cases.
155 */
156#define XML_SKIP_IDS 8
157
158/**
159 * xmlParserMode:
160 *
161 * A parser can operate in various modes
162 */
163typedef enum {
164 XML_PARSE_UNKNOWN = 0,
165 XML_PARSE_DOM = 1,
166 XML_PARSE_SAX = 2,
167 XML_PARSE_PUSH_DOM = 3,
168 XML_PARSE_PUSH_SAX = 4,
169 XML_PARSE_READER = 5
170} xmlParserMode;
171
172typedef struct _xmlStartTag xmlStartTag;
173
174/**
175 * xmlParserCtxt:
176 *
177 * The parser context.
178 * NOTE This doesn't completely define the parser state, the (current ?)
179 * design of the parser uses recursive function calls since this allow
180 * and easy mapping from the production rules of the specification
181 * to the actual code. The drawback is that the actual function call
182 * also reflect the parser state. However most of the parsing routines
183 * takes as the only argument the parser context pointer, so migrating
184 * to a state based parser for progressive parsing shouldn't be too hard.
185 */
186struct _xmlParserCtxt {
187 struct _xmlSAXHandler *sax; /* The SAX handler */
188 void *userData; /* For SAX interface only, used by DOM build */
189 xmlDocPtr myDoc; /* the document being built */
190 int wellFormed; /* is the document well formed */
191 int replaceEntities; /* shall we replace entities ? */
192 const xmlChar *version; /* the XML version string */
193 const xmlChar *encoding; /* the declared encoding, if any */
194 int standalone; /* standalone document */
195 int html; /* an HTML(1)/Docbook(2) document
196 * 3 is HTML after <head>
197 * 10 is HTML after <body>
198 */
199
200 /* Input stream stack */
201 xmlParserInputPtr input; /* Current input stream */
202 int inputNr; /* Number of current input streams */
203 int inputMax; /* Max number of input streams */
204 xmlParserInputPtr *inputTab; /* stack of inputs */
205
206 /* Node analysis stack only used for DOM building */
207 xmlNodePtr node; /* Current parsed Node */
208 int nodeNr; /* Depth of the parsing stack */
209 int nodeMax; /* Max depth of the parsing stack */
210 xmlNodePtr *nodeTab; /* array of nodes */
211
212 int record_info; /* Whether node info should be kept */
213 xmlParserNodeInfoSeq node_seq; /* info about each node parsed */
214
215 int errNo; /* error code */
216
217 int hasExternalSubset; /* reference and external subset */
218 int hasPErefs; /* the internal subset has PE refs */
219 int external; /* are we parsing an external entity */
220
221 int valid; /* is the document valid */
222 int validate; /* shall we try to validate ? */
223 xmlValidCtxt vctxt; /* The validity context */
224
225 xmlParserInputState instate; /* current type of input */
226 int token; /* next char look-ahead */
227
228 char *directory; /* the data directory */
229
230 /* Node name stack */
231 const xmlChar *name; /* Current parsed Node */
232 int nameNr; /* Depth of the parsing stack */
233 int nameMax; /* Max depth of the parsing stack */
234 const xmlChar * *nameTab; /* array of nodes */
235
236 long nbChars; /* unused */
237 long checkIndex; /* used by progressive parsing lookup */
238 int keepBlanks; /* ugly but ... */
239 int disableSAX; /* SAX callbacks are disabled */
240 int inSubset; /* Parsing is in int 1/ext 2 subset */
241 const xmlChar * intSubName; /* name of subset */
242 xmlChar * extSubURI; /* URI of external subset */
243 xmlChar * extSubSystem; /* SYSTEM ID of external subset */
244
245 /* xml:space values */
246 int * space; /* Should the parser preserve spaces */
247 int spaceNr; /* Depth of the parsing stack */
248 int spaceMax; /* Max depth of the parsing stack */
249 int * spaceTab; /* array of space infos */
250
251 int depth; /* to prevent entity substitution loops */
252 xmlParserInputPtr entity; /* used to check entities boundaries */
253 int charset; /* encoding of the in-memory content
254 actually an xmlCharEncoding */
255 int nodelen; /* Those two fields are there to */
256 int nodemem; /* Speed up large node parsing */
257 int pedantic; /* signal pedantic warnings */
258 void *_private; /* For user data, libxml won't touch it */
259
260 int loadsubset; /* should the external subset be loaded */
261 int linenumbers; /* set line number in element content */
262 void *catalogs; /* document's own catalog */
263 int recovery; /* run in recovery mode */
264 int progressive; /* is this a progressive parsing */
265 xmlDictPtr dict; /* dictionary for the parser */
266 const xmlChar * *atts; /* array for the attributes callbacks */
267 int maxatts; /* the size of the array */
268 int docdict; /* use strings from dict to build tree */
269
270 /*
271 * pre-interned strings
272 */
273 const xmlChar *str_xml;
274 const xmlChar *str_xmlns;
275 const xmlChar *str_xml_ns;
276
277 /*
278 * Everything below is used only by the new SAX mode
279 */
280 int sax2; /* operating in the new SAX mode */
281 int nsNr; /* the number of inherited namespaces */
282 int nsMax; /* the size of the arrays */
283 const xmlChar * *nsTab; /* the array of prefix/namespace name */
284 int *attallocs; /* which attribute were allocated */
285 xmlStartTag *pushTab; /* array of data for push */
286 xmlHashTablePtr attsDefault; /* defaulted attributes if any */
287 xmlHashTablePtr attsSpecial; /* non-CDATA attributes if any */
288 int nsWellFormed; /* is the document XML Namespace okay */
289 int options; /* Extra options */
290
291 /*
292 * Those fields are needed only for streaming parsing so far
293 */
294 int dictNames; /* Use dictionary names for the tree */
295 int freeElemsNr; /* number of freed element nodes */
296 xmlNodePtr freeElems; /* List of freed element nodes */
297 int freeAttrsNr; /* number of freed attributes nodes */
298 xmlAttrPtr freeAttrs; /* List of freed attributes nodes */
299
300 /*
301 * the complete error information for the last error.
302 */
303 xmlError lastError;
304 xmlParserMode parseMode; /* the parser mode */
305 unsigned long nbentities; /* number of entities references */
306 unsigned long sizeentities; /* size of parsed entities */
307
308 /* for use by HTML non-recursive parser */
309 xmlParserNodeInfo *nodeInfo; /* Current NodeInfo */
310 int nodeInfoNr; /* Depth of the parsing stack */
311 int nodeInfoMax; /* Max depth of the parsing stack */
312 xmlParserNodeInfo *nodeInfoTab; /* array of nodeInfos */
313
314 int input_id; /* we need to label inputs */
315 unsigned long sizeentcopy; /* volume of entity copy */
316};
317
318/**
319 * xmlSAXLocator:
320 *
321 * A SAX Locator.
322 */
323struct _xmlSAXLocator {
324 const xmlChar *(*getPublicId)(void *ctx);
325 const xmlChar *(*getSystemId)(void *ctx);
326 int (*getLineNumber)(void *ctx);
327 int (*getColumnNumber)(void *ctx);
328};
329
330/**
331 * xmlSAXHandler:
332 *
333 * A SAX handler is bunch of callbacks called by the parser when processing
334 * of the input generate data or structure information.
335 */
336
337/**
338 * resolveEntitySAXFunc:
339 * @ctx: the user data (XML parser context)
340 * @publicId: The public ID of the entity
341 * @systemId: The system ID of the entity
342 *
343 * Callback:
344 * The entity loader, to control the loading of external entities,
345 * the application can either:
346 * - override this resolveEntity() callback in the SAX block
347 * - or better use the xmlSetExternalEntityLoader() function to
348 * set up it's own entity resolution routine
349 *
350 * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
351 */
352typedef xmlParserInputPtr (*resolveEntitySAXFunc) (void *ctx,
353 const xmlChar *publicId,
354 const xmlChar *systemId);
355/**
356 * internalSubsetSAXFunc:
357 * @ctx: the user data (XML parser context)
358 * @name: the root element name
359 * @ExternalID: the external ID
360 * @SystemID: the SYSTEM ID (e.g. filename or URL)
361 *
362 * Callback on internal subset declaration.
363 */
364typedef void (*internalSubsetSAXFunc) (void *ctx,
365 const xmlChar *name,
366 const xmlChar *ExternalID,
367 const xmlChar *SystemID);
368/**
369 * externalSubsetSAXFunc:
370 * @ctx: the user data (XML parser context)
371 * @name: the root element name
372 * @ExternalID: the external ID
373 * @SystemID: the SYSTEM ID (e.g. filename or URL)
374 *
375 * Callback on external subset declaration.
376 */
377typedef void (*externalSubsetSAXFunc) (void *ctx,
378 const xmlChar *name,
379 const xmlChar *ExternalID,
380 const xmlChar *SystemID);
381/**
382 * getEntitySAXFunc:
383 * @ctx: the user data (XML parser context)
384 * @name: The entity name
385 *
386 * Get an entity by name.
387 *
388 * Returns the xmlEntityPtr if found.
389 */
390typedef xmlEntityPtr (*getEntitySAXFunc) (void *ctx,
391 const xmlChar *name);
392/**
393 * getParameterEntitySAXFunc:
394 * @ctx: the user data (XML parser context)
395 * @name: The entity name
396 *
397 * Get a parameter entity by name.
398 *
399 * Returns the xmlEntityPtr if found.
400 */
401typedef xmlEntityPtr (*getParameterEntitySAXFunc) (void *ctx,
402 const xmlChar *name);
403/**
404 * entityDeclSAXFunc:
405 * @ctx: the user data (XML parser context)
406 * @name: the entity name
407 * @type: the entity type
408 * @publicId: The public ID of the entity
409 * @systemId: The system ID of the entity
410 * @content: the entity value (without processing).
411 *
412 * An entity definition has been parsed.
413 */
414typedef void (*entityDeclSAXFunc) (void *ctx,
415 const xmlChar *name,
416 int type,
417 const xmlChar *publicId,
418 const xmlChar *systemId,
419 xmlChar *content);
420/**
421 * notationDeclSAXFunc:
422 * @ctx: the user data (XML parser context)
423 * @name: The name of the notation
424 * @publicId: The public ID of the entity
425 * @systemId: The system ID of the entity
426 *
427 * What to do when a notation declaration has been parsed.
428 */
429typedef void (*notationDeclSAXFunc)(void *ctx,
430 const xmlChar *name,
431 const xmlChar *publicId,
432 const xmlChar *systemId);
433/**
434 * attributeDeclSAXFunc:
435 * @ctx: the user data (XML parser context)
436 * @elem: the name of the element
437 * @fullname: the attribute name
438 * @type: the attribute type
439 * @def: the type of default value
440 * @defaultValue: the attribute default value
441 * @tree: the tree of enumerated value set
442 *
443 * An attribute definition has been parsed.
444 */
445typedef void (*attributeDeclSAXFunc)(void *ctx,
446 const xmlChar *elem,
447 const xmlChar *fullname,
448 int type,
449 int def,
450 const xmlChar *defaultValue,
451 xmlEnumerationPtr tree);
452/**
453 * elementDeclSAXFunc:
454 * @ctx: the user data (XML parser context)
455 * @name: the element name
456 * @type: the element type
457 * @content: the element value tree
458 *
459 * An element definition has been parsed.
460 */
461typedef void (*elementDeclSAXFunc)(void *ctx,
462 const xmlChar *name,
463 int type,
464 xmlElementContentPtr content);
465/**
466 * unparsedEntityDeclSAXFunc:
467 * @ctx: the user data (XML parser context)
468 * @name: The name of the entity
469 * @publicId: The public ID of the entity
470 * @systemId: The system ID of the entity
471 * @notationName: the name of the notation
472 *
473 * What to do when an unparsed entity declaration is parsed.
474 */
475typedef void (*unparsedEntityDeclSAXFunc)(void *ctx,
476 const xmlChar *name,
477 const xmlChar *publicId,
478 const xmlChar *systemId,
479 const xmlChar *notationName);
480/**
481 * setDocumentLocatorSAXFunc:
482 * @ctx: the user data (XML parser context)
483 * @loc: A SAX Locator
484 *
485 * Receive the document locator at startup, actually xmlDefaultSAXLocator.
486 * Everything is available on the context, so this is useless in our case.
487 */
488typedef void (*setDocumentLocatorSAXFunc) (void *ctx,
489 xmlSAXLocatorPtr loc);
490/**
491 * startDocumentSAXFunc:
492 * @ctx: the user data (XML parser context)
493 *
494 * Called when the document start being processed.
495 */
496typedef void (*startDocumentSAXFunc) (void *ctx);
497/**
498 * endDocumentSAXFunc:
499 * @ctx: the user data (XML parser context)
500 *
501 * Called when the document end has been detected.
502 */
503typedef void (*endDocumentSAXFunc) (void *ctx);
504/**
505 * startElementSAXFunc:
506 * @ctx: the user data (XML parser context)
507 * @name: The element name, including namespace prefix
508 * @atts: An array of name/value attributes pairs, NULL terminated
509 *
510 * Called when an opening tag has been processed.
511 */
512typedef void (*startElementSAXFunc) (void *ctx,
513 const xmlChar *name,
514 const xmlChar **atts);
515/**
516 * endElementSAXFunc:
517 * @ctx: the user data (XML parser context)
518 * @name: The element name
519 *
520 * Called when the end of an element has been detected.
521 */
522typedef void (*endElementSAXFunc) (void *ctx,
523 const xmlChar *name);
524/**
525 * attributeSAXFunc:
526 * @ctx: the user data (XML parser context)
527 * @name: The attribute name, including namespace prefix
528 * @value: The attribute value
529 *
530 * Handle an attribute that has been read by the parser.
531 * The default handling is to convert the attribute into an
532 * DOM subtree and past it in a new xmlAttr element added to
533 * the element.
534 */
535typedef void (*attributeSAXFunc) (void *ctx,
536 const xmlChar *name,
537 const xmlChar *value);
538/**
539 * referenceSAXFunc:
540 * @ctx: the user data (XML parser context)
541 * @name: The entity name
542 *
543 * Called when an entity reference is detected.
544 */
545typedef void (*referenceSAXFunc) (void *ctx,
546 const xmlChar *name);
547/**
548 * charactersSAXFunc:
549 * @ctx: the user data (XML parser context)
550 * @ch: a xmlChar string
551 * @len: the number of xmlChar
552 *
553 * Receiving some chars from the parser.
554 */
555typedef void (*charactersSAXFunc) (void *ctx,
556 const xmlChar *ch,
557 int len);
558/**
559 * ignorableWhitespaceSAXFunc:
560 * @ctx: the user data (XML parser context)
561 * @ch: a xmlChar string
562 * @len: the number of xmlChar
563 *
564 * Receiving some ignorable whitespaces from the parser.
565 * UNUSED: by default the DOM building will use characters.
566 */
567typedef void (*ignorableWhitespaceSAXFunc) (void *ctx,
568 const xmlChar *ch,
569 int len);
570/**
571 * processingInstructionSAXFunc:
572 * @ctx: the user data (XML parser context)
573 * @target: the target name
574 * @data: the PI data's
575 *
576 * A processing instruction has been parsed.
577 */
578typedef void (*processingInstructionSAXFunc) (void *ctx,
579 const xmlChar *target,
580 const xmlChar *data);
581/**
582 * commentSAXFunc:
583 * @ctx: the user data (XML parser context)
584 * @value: the comment content
585 *
586 * A comment has been parsed.
587 */
588typedef void (*commentSAXFunc) (void *ctx,
589 const xmlChar *value);
590/**
591 * cdataBlockSAXFunc:
592 * @ctx: the user data (XML parser context)
593 * @value: The pcdata content
594 * @len: the block length
595 *
596 * Called when a pcdata block has been parsed.
597 */
598typedef void (*cdataBlockSAXFunc) (
599 void *ctx,
600 const xmlChar *value,
601 int len);
602/**
603 * warningSAXFunc:
604 * @ctx: an XML parser context
605 * @msg: the message to display/transmit
606 * @...: extra parameters for the message display
607 *
608 * Display and format a warning messages, callback.
609 */
610typedef void (XMLCDECL *warningSAXFunc) (void *ctx,
611 const char *msg, ...) LIBXML_ATTR_FORMAT(2,3);
612/**
613 * errorSAXFunc:
614 * @ctx: an XML parser context
615 * @msg: the message to display/transmit
616 * @...: extra parameters for the message display
617 *
618 * Display and format an error messages, callback.
619 */
620typedef void (XMLCDECL *errorSAXFunc) (void *ctx,
621 const char *msg, ...) LIBXML_ATTR_FORMAT(2,3);
622/**
623 * fatalErrorSAXFunc:
624 * @ctx: an XML parser context
625 * @msg: the message to display/transmit
626 * @...: extra parameters for the message display
627 *
628 * Display and format fatal error messages, callback.
629 * Note: so far fatalError() SAX callbacks are not used, error()
630 * get all the callbacks for errors.
631 */
632typedef void (XMLCDECL *fatalErrorSAXFunc) (void *ctx,
633 const char *msg, ...) LIBXML_ATTR_FORMAT(2,3);
634/**
635 * isStandaloneSAXFunc:
636 * @ctx: the user data (XML parser context)
637 *
638 * Is this document tagged standalone?
639 *
640 * Returns 1 if true
641 */
642typedef int (*isStandaloneSAXFunc) (void *ctx);
643/**
644 * hasInternalSubsetSAXFunc:
645 * @ctx: the user data (XML parser context)
646 *
647 * Does this document has an internal subset.
648 *
649 * Returns 1 if true
650 */
651typedef int (*hasInternalSubsetSAXFunc) (void *ctx);
652
653/**
654 * hasExternalSubsetSAXFunc:
655 * @ctx: the user data (XML parser context)
656 *
657 * Does this document has an external subset?
658 *
659 * Returns 1 if true
660 */
661typedef int (*hasExternalSubsetSAXFunc) (void *ctx);
662
663/************************************************************************
664 * *
665 * The SAX version 2 API extensions *
666 * *
667 ************************************************************************/
668/**
669 * XML_SAX2_MAGIC:
670 *
671 * Special constant found in SAX2 blocks initialized fields
672 */
673#define XML_SAX2_MAGIC 0xDEEDBEAF
674
675/**
676 * startElementNsSAX2Func:
677 * @ctx: the user data (XML parser context)
678 * @localname: the local name of the element
679 * @prefix: the element namespace prefix if available
680 * @URI: the element namespace name if available
681 * @nb_namespaces: number of namespace definitions on that node
682 * @namespaces: pointer to the array of prefix/URI pairs namespace definitions
683 * @nb_attributes: the number of attributes on that node
684 * @nb_defaulted: the number of defaulted attributes. The defaulted
685 * ones are at the end of the array
686 * @attributes: pointer to the array of (localname/prefix/URI/value/end)
687 * attribute values.
688 *
689 * SAX2 callback when an element start has been detected by the parser.
690 * It provides the namespace information for the element, as well as
691 * the new namespace declarations on the element.
692 */
693
694typedef void (*startElementNsSAX2Func) (void *ctx,
695 const xmlChar *localname,
696 const xmlChar *prefix,
697 const xmlChar *URI,
698 int nb_namespaces,
699 const xmlChar **namespaces,
700 int nb_attributes,
701 int nb_defaulted,
702 const xmlChar **attributes);
703
704/**
705 * endElementNsSAX2Func:
706 * @ctx: the user data (XML parser context)
707 * @localname: the local name of the element
708 * @prefix: the element namespace prefix if available
709 * @URI: the element namespace name if available
710 *
711 * SAX2 callback when an element end has been detected by the parser.
712 * It provides the namespace information for the element.
713 */
714
715typedef void (*endElementNsSAX2Func) (void *ctx,
716 const xmlChar *localname,
717 const xmlChar *prefix,
718 const xmlChar *URI);
719
720
721struct _xmlSAXHandler {
722 internalSubsetSAXFunc internalSubset;
723 isStandaloneSAXFunc isStandalone;
724 hasInternalSubsetSAXFunc hasInternalSubset;
725 hasExternalSubsetSAXFunc hasExternalSubset;
726 resolveEntitySAXFunc resolveEntity;
727 getEntitySAXFunc getEntity;
728 entityDeclSAXFunc entityDecl;
729 notationDeclSAXFunc notationDecl;
730 attributeDeclSAXFunc attributeDecl;
731 elementDeclSAXFunc elementDecl;
732 unparsedEntityDeclSAXFunc unparsedEntityDecl;
733 setDocumentLocatorSAXFunc setDocumentLocator;
734 startDocumentSAXFunc startDocument;
735 endDocumentSAXFunc endDocument;
736 startElementSAXFunc startElement;
737 endElementSAXFunc endElement;
738 referenceSAXFunc reference;
739 charactersSAXFunc characters;
740 ignorableWhitespaceSAXFunc ignorableWhitespace;
741 processingInstructionSAXFunc processingInstruction;
742 commentSAXFunc comment;
743 warningSAXFunc warning;
744 errorSAXFunc error;
745 fatalErrorSAXFunc fatalError; /* unused error() get all the errors */
746 getParameterEntitySAXFunc getParameterEntity;
747 cdataBlockSAXFunc cdataBlock;
748 externalSubsetSAXFunc externalSubset;
749 unsigned int initialized;
750 /* The following fields are extensions available only on version 2 */
751 void *_private;
752 startElementNsSAX2Func startElementNs;
753 endElementNsSAX2Func endElementNs;
754 xmlStructuredErrorFunc serror;
755};
756
757/*
758 * SAX Version 1
759 */
760typedef struct _xmlSAXHandlerV1 xmlSAXHandlerV1;
761typedef xmlSAXHandlerV1 *xmlSAXHandlerV1Ptr;
762struct _xmlSAXHandlerV1 {
763 internalSubsetSAXFunc internalSubset;
764 isStandaloneSAXFunc isStandalone;
765 hasInternalSubsetSAXFunc hasInternalSubset;
766 hasExternalSubsetSAXFunc hasExternalSubset;
767 resolveEntitySAXFunc resolveEntity;
768 getEntitySAXFunc getEntity;
769 entityDeclSAXFunc entityDecl;
770 notationDeclSAXFunc notationDecl;
771 attributeDeclSAXFunc attributeDecl;
772 elementDeclSAXFunc elementDecl;
773 unparsedEntityDeclSAXFunc unparsedEntityDecl;
774 setDocumentLocatorSAXFunc setDocumentLocator;
775 startDocumentSAXFunc startDocument;
776 endDocumentSAXFunc endDocument;
777 startElementSAXFunc startElement;
778 endElementSAXFunc endElement;
779 referenceSAXFunc reference;
780 charactersSAXFunc characters;
781 ignorableWhitespaceSAXFunc ignorableWhitespace;
782 processingInstructionSAXFunc processingInstruction;
783 commentSAXFunc comment;
784 warningSAXFunc warning;
785 errorSAXFunc error;
786 fatalErrorSAXFunc fatalError; /* unused error() get all the errors */
787 getParameterEntitySAXFunc getParameterEntity;
788 cdataBlockSAXFunc cdataBlock;
789 externalSubsetSAXFunc externalSubset;
790 unsigned int initialized;
791};
792
793
794/**
795 * xmlExternalEntityLoader:
796 * @URL: The System ID of the resource requested
797 * @ID: The Public ID of the resource requested
798 * @context: the XML parser context
799 *
800 * External entity loaders types.
801 *
802 * Returns the entity input parser.
803 */
804typedef xmlParserInputPtr (*xmlExternalEntityLoader) (const char *URL,
805 const char *ID,
806 xmlParserCtxtPtr context);
807
808#ifdef __cplusplus
809}
810#endif
811
812#include <libxml/encoding.h>
813#include <libxml/xmlIO.h>
814#include <libxml/globals.h>
815
816#ifdef __cplusplus
817extern "C" {
818#endif
819
820
821/*
822 * Init/Cleanup
823 */
824XMLPUBFUN void XMLCALL
825 xmlInitParser (void);
826XMLPUBFUN void XMLCALL
827 xmlCleanupParser (void);
828
829/*
830 * Input functions
831 */
832XMLPUBFUN int XMLCALL
833 xmlParserInputRead (xmlParserInputPtr in,
834 int len);
835XMLPUBFUN int XMLCALL
836 xmlParserInputGrow (xmlParserInputPtr in,
837 int len);
838
839/*
840 * Basic parsing Interfaces
841 */
842#ifdef LIBXML_SAX1_ENABLED
843XMLPUBFUN xmlDocPtr XMLCALL
844 xmlParseDoc (const xmlChar *cur);
845XMLPUBFUN xmlDocPtr XMLCALL
846 xmlParseFile (const char *filename);
847XMLPUBFUN xmlDocPtr XMLCALL
848 xmlParseMemory (const char *buffer,
849 int size);
850#endif /* LIBXML_SAX1_ENABLED */
851XMLPUBFUN int XMLCALL
852 xmlSubstituteEntitiesDefault(int val);
853XMLPUBFUN int XMLCALL
854 xmlKeepBlanksDefault (int val);
855XMLPUBFUN void XMLCALL
856 xmlStopParser (xmlParserCtxtPtr ctxt);
857XMLPUBFUN int XMLCALL
858 xmlPedanticParserDefault(int val);
859XMLPUBFUN int XMLCALL
860 xmlLineNumbersDefault (int val);
861
862#ifdef LIBXML_SAX1_ENABLED
863/*
864 * Recovery mode
865 */
866XMLPUBFUN xmlDocPtr XMLCALL
867 xmlRecoverDoc (const xmlChar *cur);
868XMLPUBFUN xmlDocPtr XMLCALL
869 xmlRecoverMemory (const char *buffer,
870 int size);
871XMLPUBFUN xmlDocPtr XMLCALL
872 xmlRecoverFile (const char *filename);
873#endif /* LIBXML_SAX1_ENABLED */
874
875/*
876 * Less common routines and SAX interfaces
877 */
878XMLPUBFUN int XMLCALL
879 xmlParseDocument (xmlParserCtxtPtr ctxt);
880XMLPUBFUN int XMLCALL
881 xmlParseExtParsedEnt (xmlParserCtxtPtr ctxt);
882#ifdef LIBXML_SAX1_ENABLED
883XMLPUBFUN int XMLCALL
884 xmlSAXUserParseFile (xmlSAXHandlerPtr sax,
885 void *user_data,
886 const char *filename);
887XMLPUBFUN int XMLCALL
888 xmlSAXUserParseMemory (xmlSAXHandlerPtr sax,
889 void *user_data,
890 const char *buffer,
891 int size);
892XMLPUBFUN xmlDocPtr XMLCALL
893 xmlSAXParseDoc (xmlSAXHandlerPtr sax,
894 const xmlChar *cur,
895 int recovery);
896XMLPUBFUN xmlDocPtr XMLCALL
897 xmlSAXParseMemory (xmlSAXHandlerPtr sax,
898 const char *buffer,
899 int size,
900 int recovery);
901XMLPUBFUN xmlDocPtr XMLCALL
902 xmlSAXParseMemoryWithData (xmlSAXHandlerPtr sax,
903 const char *buffer,
904 int size,
905 int recovery,
906 void *data);
907XMLPUBFUN xmlDocPtr XMLCALL
908 xmlSAXParseFile (xmlSAXHandlerPtr sax,
909 const char *filename,
910 int recovery);
911XMLPUBFUN xmlDocPtr XMLCALL
912 xmlSAXParseFileWithData (xmlSAXHandlerPtr sax,
913 const char *filename,
914 int recovery,
915 void *data);
916XMLPUBFUN xmlDocPtr XMLCALL
917 xmlSAXParseEntity (xmlSAXHandlerPtr sax,
918 const char *filename);
919XMLPUBFUN xmlDocPtr XMLCALL
920 xmlParseEntity (const char *filename);
921#endif /* LIBXML_SAX1_ENABLED */
922
923#ifdef LIBXML_VALID_ENABLED
924XMLPUBFUN xmlDtdPtr XMLCALL
925 xmlSAXParseDTD (xmlSAXHandlerPtr sax,
926 const xmlChar *ExternalID,
927 const xmlChar *SystemID);
928XMLPUBFUN xmlDtdPtr XMLCALL
929 xmlParseDTD (const xmlChar *ExternalID,
930 const xmlChar *SystemID);
931XMLPUBFUN xmlDtdPtr XMLCALL
932 xmlIOParseDTD (xmlSAXHandlerPtr sax,
933 xmlParserInputBufferPtr input,
934 xmlCharEncoding enc);
935#endif /* LIBXML_VALID_ENABLE */
936#ifdef LIBXML_SAX1_ENABLED
937XMLPUBFUN int XMLCALL
938 xmlParseBalancedChunkMemory(xmlDocPtr doc,
939 xmlSAXHandlerPtr sax,
940 void *user_data,
941 int depth,
942 const xmlChar *string,
943 xmlNodePtr *lst);
944#endif /* LIBXML_SAX1_ENABLED */
945XMLPUBFUN xmlParserErrors XMLCALL
946 xmlParseInNodeContext (xmlNodePtr node,
947 const char *data,
948 int datalen,
949 int options,
950 xmlNodePtr *lst);
951#ifdef LIBXML_SAX1_ENABLED
952XMLPUBFUN int XMLCALL
953 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc,
954 xmlSAXHandlerPtr sax,
955 void *user_data,
956 int depth,
957 const xmlChar *string,
958 xmlNodePtr *lst,
959 int recover);
960XMLPUBFUN int XMLCALL
961 xmlParseExternalEntity (xmlDocPtr doc,
962 xmlSAXHandlerPtr sax,
963 void *user_data,
964 int depth,
965 const xmlChar *URL,
966 const xmlChar *ID,
967 xmlNodePtr *lst);
968#endif /* LIBXML_SAX1_ENABLED */
969XMLPUBFUN int XMLCALL
970 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx,
971 const xmlChar *URL,
972 const xmlChar *ID,
973 xmlNodePtr *lst);
974
975/*
976 * Parser contexts handling.
977 */
978XMLPUBFUN xmlParserCtxtPtr XMLCALL
979 xmlNewParserCtxt (void);
980XMLPUBFUN int XMLCALL
981 xmlInitParserCtxt (xmlParserCtxtPtr ctxt);
982XMLPUBFUN void XMLCALL
983 xmlClearParserCtxt (xmlParserCtxtPtr ctxt);
984XMLPUBFUN void XMLCALL
985 xmlFreeParserCtxt (xmlParserCtxtPtr ctxt);
986#ifdef LIBXML_SAX1_ENABLED
987XMLPUBFUN void XMLCALL
988 xmlSetupParserForBuffer (xmlParserCtxtPtr ctxt,
989 const xmlChar* buffer,
990 const char *filename);
991#endif /* LIBXML_SAX1_ENABLED */
992XMLPUBFUN xmlParserCtxtPtr XMLCALL
993 xmlCreateDocParserCtxt (const xmlChar *cur);
994
995#ifdef LIBXML_LEGACY_ENABLED
996/*
997 * Reading/setting optional parsing features.
998 */
999XMLPUBFUN int XMLCALL
1000 xmlGetFeaturesList (int *len,
1001 const char **result);
1002XMLPUBFUN int XMLCALL
1003 xmlGetFeature (xmlParserCtxtPtr ctxt,
1004 const char *name,
1005 void *result);
1006XMLPUBFUN int XMLCALL
1007 xmlSetFeature (xmlParserCtxtPtr ctxt,
1008 const char *name,
1009 void *value);
1010#endif /* LIBXML_LEGACY_ENABLED */
1011
1012#ifdef LIBXML_PUSH_ENABLED
1013/*
1014 * Interfaces for the Push mode.
1015 */
1016XMLPUBFUN xmlParserCtxtPtr XMLCALL
1017 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax,
1018 void *user_data,
1019 const char *chunk,
1020 int size,
1021 const char *filename);
1022XMLPUBFUN int XMLCALL
1023 xmlParseChunk (xmlParserCtxtPtr ctxt,
1024 const char *chunk,
1025 int size,
1026 int terminate);
1027#endif /* LIBXML_PUSH_ENABLED */
1028
1029/*
1030 * Special I/O mode.
1031 */
1032
1033XMLPUBFUN xmlParserCtxtPtr XMLCALL
1034 xmlCreateIOParserCtxt (xmlSAXHandlerPtr sax,
1035 void *user_data,
1036 xmlInputReadCallback ioread,
1037 xmlInputCloseCallback ioclose,
1038 void *ioctx,
1039 xmlCharEncoding enc);
1040
1041XMLPUBFUN xmlParserInputPtr XMLCALL
1042 xmlNewIOInputStream (xmlParserCtxtPtr ctxt,
1043 xmlParserInputBufferPtr input,
1044 xmlCharEncoding enc);
1045
1046/*
1047 * Node infos.
1048 */
1049XMLPUBFUN const xmlParserNodeInfo* XMLCALL
1050 xmlParserFindNodeInfo (const xmlParserCtxtPtr ctxt,
1051 const xmlNodePtr node);
1052XMLPUBFUN void XMLCALL
1053 xmlInitNodeInfoSeq (xmlParserNodeInfoSeqPtr seq);
1054XMLPUBFUN void XMLCALL
1055 xmlClearNodeInfoSeq (xmlParserNodeInfoSeqPtr seq);
1056XMLPUBFUN unsigned long XMLCALL
1057 xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq,
1058 const xmlNodePtr node);
1059XMLPUBFUN void XMLCALL
1060 xmlParserAddNodeInfo (xmlParserCtxtPtr ctxt,
1061 const xmlParserNodeInfoPtr info);
1062
1063/*
1064 * External entities handling actually implemented in xmlIO.
1065 */
1066
1067XMLPUBFUN void XMLCALL
1068 xmlSetExternalEntityLoader(xmlExternalEntityLoader f);
1069XMLPUBFUN xmlExternalEntityLoader XMLCALL
1070 xmlGetExternalEntityLoader(void);
1071XMLPUBFUN xmlParserInputPtr XMLCALL
1072 xmlLoadExternalEntity (const char *URL,
1073 const char *ID,
1074 xmlParserCtxtPtr ctxt);
1075
1076/*
1077 * Index lookup, actually implemented in the encoding module
1078 */
1079XMLPUBFUN long XMLCALL
1080 xmlByteConsumed (xmlParserCtxtPtr ctxt);
1081
1082/*
1083 * New set of simpler/more flexible APIs
1084 */
1085/**
1086 * xmlParserOption:
1087 *
1088 * This is the set of XML parser options that can be passed down
1089 * to the xmlReadDoc() and similar calls.
1090 */
1091typedef enum {
1092 XML_PARSE_RECOVER = 1<<0, /* recover on errors */
1093 XML_PARSE_NOENT = 1<<1, /* substitute entities */
1094 XML_PARSE_DTDLOAD = 1<<2, /* load the external subset */
1095 XML_PARSE_DTDATTR = 1<<3, /* default DTD attributes */
1096 XML_PARSE_DTDVALID = 1<<4, /* validate with the DTD */
1097 XML_PARSE_NOERROR = 1<<5, /* suppress error reports */
1098 XML_PARSE_NOWARNING = 1<<6, /* suppress warning reports */
1099 XML_PARSE_PEDANTIC = 1<<7, /* pedantic error reporting */
1100 XML_PARSE_NOBLANKS = 1<<8, /* remove blank nodes */
1101 XML_PARSE_SAX1 = 1<<9, /* use the SAX1 interface internally */
1102 XML_PARSE_XINCLUDE = 1<<10,/* Implement XInclude substitution */
1103 XML_PARSE_NONET = 1<<11,/* Forbid network access */
1104 XML_PARSE_NODICT = 1<<12,/* Do not reuse the context dictionary */
1105 XML_PARSE_NSCLEAN = 1<<13,/* remove redundant namespaces declarations */
1106 XML_PARSE_NOCDATA = 1<<14,/* merge CDATA as text nodes */
1107 XML_PARSE_NOXINCNODE= 1<<15,/* do not generate XINCLUDE START/END nodes */
1108 XML_PARSE_COMPACT = 1<<16,/* compact small text nodes; no modification of
1109 the tree allowed afterwards (will possibly
1110 crash if you try to modify the tree) */
1111 XML_PARSE_OLD10 = 1<<17,/* parse using XML-1.0 before update 5 */
1112 XML_PARSE_NOBASEFIX = 1<<18,/* do not fixup XINCLUDE xml:base uris */
1113 XML_PARSE_HUGE = 1<<19,/* relax any hardcoded limit from the parser */
1114 XML_PARSE_OLDSAX = 1<<20,/* parse using SAX2 interface before 2.7.0 */
1115 XML_PARSE_IGNORE_ENC= 1<<21,/* ignore internal document encoding hint */
1116 XML_PARSE_BIG_LINES = 1<<22 /* Store big lines numbers in text PSVI field */
1117} xmlParserOption;
1118
1119XMLPUBFUN void XMLCALL
1120 xmlCtxtReset (xmlParserCtxtPtr ctxt);
1121XMLPUBFUN int XMLCALL
1122 xmlCtxtResetPush (xmlParserCtxtPtr ctxt,
1123 const char *chunk,
1124 int size,
1125 const char *filename,
1126 const char *encoding);
1127XMLPUBFUN int XMLCALL
1128 xmlCtxtUseOptions (xmlParserCtxtPtr ctxt,
1129 int options);
1130XMLPUBFUN xmlDocPtr XMLCALL
1131 xmlReadDoc (const xmlChar *cur,
1132 const char *URL,
1133 const char *encoding,
1134 int options);
1135XMLPUBFUN xmlDocPtr XMLCALL
1136 xmlReadFile (const char *URL,
1137 const char *encoding,
1138 int options);
1139XMLPUBFUN xmlDocPtr XMLCALL
1140 xmlReadMemory (const char *buffer,
1141 int size,
1142 const char *URL,
1143 const char *encoding,
1144 int options);
1145XMLPUBFUN xmlDocPtr XMLCALL
1146 xmlReadFd (int fd,
1147 const char *URL,
1148 const char *encoding,
1149 int options);
1150XMLPUBFUN xmlDocPtr XMLCALL
1151 xmlReadIO (xmlInputReadCallback ioread,
1152 xmlInputCloseCallback ioclose,
1153 void *ioctx,
1154 const char *URL,
1155 const char *encoding,
1156 int options);
1157XMLPUBFUN xmlDocPtr XMLCALL
1158 xmlCtxtReadDoc (xmlParserCtxtPtr ctxt,
1159 const xmlChar *cur,
1160 const char *URL,
1161 const char *encoding,
1162 int options);
1163XMLPUBFUN xmlDocPtr XMLCALL
1164 xmlCtxtReadFile (xmlParserCtxtPtr ctxt,
1165 const char *filename,
1166 const char *encoding,
1167 int options);
1168XMLPUBFUN xmlDocPtr XMLCALL
1169 xmlCtxtReadMemory (xmlParserCtxtPtr ctxt,
1170 const char *buffer,
1171 int size,
1172 const char *URL,
1173 const char *encoding,
1174 int options);
1175XMLPUBFUN xmlDocPtr XMLCALL
1176 xmlCtxtReadFd (xmlParserCtxtPtr ctxt,
1177 int fd,
1178 const char *URL,
1179 const char *encoding,
1180 int options);
1181XMLPUBFUN xmlDocPtr XMLCALL
1182 xmlCtxtReadIO (xmlParserCtxtPtr ctxt,
1183 xmlInputReadCallback ioread,
1184 xmlInputCloseCallback ioclose,
1185 void *ioctx,
1186 const char *URL,
1187 const char *encoding,
1188 int options);
1189
1190/*
1191 * Library wide options
1192 */
1193/**
1194 * xmlFeature:
1195 *
1196 * Used to examine the existence of features that can be enabled
1197 * or disabled at compile-time.
1198 * They used to be called XML_FEATURE_xxx but this clashed with Expat
1199 */
1200typedef enum {
1201 XML_WITH_THREAD = 1,
1202 XML_WITH_TREE = 2,
1203 XML_WITH_OUTPUT = 3,
1204 XML_WITH_PUSH = 4,
1205 XML_WITH_READER = 5,
1206 XML_WITH_PATTERN = 6,
1207 XML_WITH_WRITER = 7,
1208 XML_WITH_SAX1 = 8,
1209 XML_WITH_FTP = 9,
1210 XML_WITH_HTTP = 10,
1211 XML_WITH_VALID = 11,
1212 XML_WITH_HTML = 12,
1213 XML_WITH_LEGACY = 13,
1214 XML_WITH_C14N = 14,
1215 XML_WITH_CATALOG = 15,
1216 XML_WITH_XPATH = 16,
1217 XML_WITH_XPTR = 17,
1218 XML_WITH_XINCLUDE = 18,
1219 XML_WITH_ICONV = 19,
1220 XML_WITH_ISO8859X = 20,
1221 XML_WITH_UNICODE = 21,
1222 XML_WITH_REGEXP = 22,
1223 XML_WITH_AUTOMATA = 23,
1224 XML_WITH_EXPR = 24,
1225 XML_WITH_SCHEMAS = 25,
1226 XML_WITH_SCHEMATRON = 26,
1227 XML_WITH_MODULES = 27,
1228 XML_WITH_DEBUG = 28,
1229 XML_WITH_DEBUG_MEM = 29,
1230 XML_WITH_DEBUG_RUN = 30,
1231 XML_WITH_ZLIB = 31,
1232 XML_WITH_ICU = 32,
1233 XML_WITH_LZMA = 33,
1234 XML_WITH_NONE = 99999 /* just to be sure of allocation size */
1235} xmlFeature;
1236
1237XMLPUBFUN int XMLCALL
1238 xmlHasFeature (xmlFeature feature);
1239
1240#ifdef __cplusplus
1241}
1242#endif
1243#endif /* __XML_PARSER_H__ */
1244

source code of include/libxml2/libxml/parser.h