HEX
Server: Apache
System: Linux 162-240-236-42.bluehost.com 3.10.0-1160.114.2.el7.x86_64 #1 SMP Wed Mar 20 15:54:52 UTC 2024 x86_64
User: bt667 (1004)
PHP: 8.2.29
Disabled: NONE
Upload Files
File: //opt/cpanel/ea-libxml2/include/libxml2/libxml/HTMLparser.h
/**
 * @file
 * 
 * @brief HTML parser, doesn't support HTML5
 * 
 * This module orginally implemented an HTML parser based on the
 * (underspecified) HTML 4.0 spec. As of 2.14, the tokenizer
 * conforms to HTML5. Tree construction still follows a custom,
 * unspecified algorithm with many differences to HTML5.
 *
 * The parser defaults to ISO-8859-1, the default encoding of
 * HTTP/1.0.
 *
 * @copyright See Copyright for the status of this software.
 *
 * @author Daniel Veillard
 */

#ifndef __HTML_PARSER_H__
#define __HTML_PARSER_H__
#include <libxml/xmlversion.h>
#include <libxml/parser.h>

#ifdef LIBXML_HTML_ENABLED

#ifdef __cplusplus
extern "C" {
#endif

/*
 * Backward compatibility
 */
#define UTF8ToHtml htmlUTF8ToHtml
#define htmlDefaultSubelement(elt) elt->defaultsubelt
#define htmlElementAllowedHereDesc(parent,elt) \
	htmlElementAllowedHere((parent), (elt)->name)
#define htmlRequiredAttrs(elt) (elt)->attrs_req

/*
 * Most of the back-end structures from XML and HTML are shared.
 */
/** Same as xmlParserCtxt */
typedef xmlParserCtxt htmlParserCtxt;
typedef xmlParserCtxtPtr htmlParserCtxtPtr;
typedef xmlParserNodeInfo htmlParserNodeInfo;
/** Same as xmlSAXHandler */
typedef xmlSAXHandler htmlSAXHandler;
typedef xmlSAXHandlerPtr htmlSAXHandlerPtr;
/** Same as xmlParserInput */
typedef xmlParserInput htmlParserInput;
typedef xmlParserInputPtr htmlParserInputPtr;
typedef xmlDocPtr htmlDocPtr;
typedef xmlNodePtr htmlNodePtr;

/** @cond ignore */

/*
 * Internal description of an HTML element, representing HTML 4.01
 * and XHTML 1.0 (which share the same structure).
 */
typedef struct _htmlElemDesc htmlElemDesc;
typedef htmlElemDesc *htmlElemDescPtr;
struct _htmlElemDesc {
    const char *name;	/* The tag name */
    char startTag;      /* unused */
    char endTag;        /* Whether the end tag can be implied */
    char saveEndTag;    /* unused */
    char empty;         /* Is this an empty element ? */
    char depr;          /* unused */
    char dtd;           /* unused */
    char isinline;      /* is this a block 0 or inline 1 element */
    const char *desc;   /* the description */

    const char** subelts XML_DEPRECATED_MEMBER;
    const char* defaultsubelt XML_DEPRECATED_MEMBER;
    const char** attrs_opt XML_DEPRECATED_MEMBER;
    const char** attrs_depr XML_DEPRECATED_MEMBER;
    const char** attrs_req XML_DEPRECATED_MEMBER;

    int dataMode;
};

/*
 * Internal description of an HTML entity.
 */
typedef struct _htmlEntityDesc htmlEntityDesc;
typedef htmlEntityDesc *htmlEntityDescPtr;
struct _htmlEntityDesc {
    unsigned int value;	/* the UNICODE value for the character */
    const char *name;	/* The entity name */
    const char *desc;   /* the description */
};

#ifdef LIBXML_SAX1_ENABLED
/**
 * @deprecated Use #xmlSAX2InitHtmlDefaultSAXHandler
 */
XML_DEPRECATED
XMLPUBVAR const xmlSAXHandlerV1 htmlDefaultSAXHandler;
#endif /* LIBXML_SAX1_ENABLED */

/** @endcond */

/*
 * There is only few public functions.
 */
XML_DEPRECATED
XMLPUBFUN void
			htmlInitAutoClose	(void);
XML_DEPRECATED
XMLPUBFUN const htmlElemDesc *
			htmlTagLookup	(const xmlChar *tag);
XML_DEPRECATED
XMLPUBFUN const htmlEntityDesc *
			htmlEntityLookup(const xmlChar *name);
XML_DEPRECATED
XMLPUBFUN const htmlEntityDesc *
			htmlEntityValueLookup(unsigned int value);

XML_DEPRECATED
XMLPUBFUN int
			htmlIsAutoClosed(xmlDoc *doc,
					 xmlNode *elem);
XML_DEPRECATED
XMLPUBFUN int
			htmlAutoCloseTag(xmlDoc *doc,
					 const xmlChar *name,
					 xmlNode *elem);
XML_DEPRECATED
XMLPUBFUN const htmlEntityDesc *
			htmlParseEntityRef(htmlParserCtxt *ctxt,
					 const xmlChar **str);
XML_DEPRECATED
XMLPUBFUN int
			htmlParseCharRef(htmlParserCtxt *ctxt);
XML_DEPRECATED
XMLPUBFUN void
			htmlParseElement(htmlParserCtxt *ctxt);

XMLPUBFUN htmlParserCtxt *
			htmlNewParserCtxt(void);
XMLPUBFUN htmlParserCtxt *
			htmlNewSAXParserCtxt(const htmlSAXHandler *sax,
					     void *userData);

XMLPUBFUN htmlParserCtxt *
			htmlCreateMemoryParserCtxt(const char *buffer,
						   int size);

XMLPUBFUN int
			htmlParseDocument(htmlParserCtxt *ctxt);
XML_DEPRECATED
XMLPUBFUN xmlDoc *
			htmlSAXParseDoc	(const xmlChar *cur,
					 const char *encoding,
					 htmlSAXHandler *sax,
					 void *userData);
XMLPUBFUN xmlDoc *
			htmlParseDoc	(const xmlChar *cur,
					 const char *encoding);
XMLPUBFUN htmlParserCtxt *
			htmlCreateFileParserCtxt(const char *filename,
	                                         const char *encoding);
XML_DEPRECATED
XMLPUBFUN xmlDoc *
			htmlSAXParseFile(const char *filename,
					 const char *encoding,
					 htmlSAXHandler *sax,
					 void *userData);
XMLPUBFUN xmlDoc *
			htmlParseFile	(const char *filename,
					 const char *encoding);
XML_DEPRECATED
XMLPUBFUN int
			htmlUTF8ToHtml	(unsigned char *out,
					 int *outlen,
					 const unsigned char *in,
					 int *inlen);
XML_DEPRECATED
XMLPUBFUN int
			htmlEncodeEntities(unsigned char *out,
					 int *outlen,
					 const unsigned char *in,
					 int *inlen, int quoteChar);
XML_DEPRECATED
XMLPUBFUN int
			htmlIsScriptAttribute(const xmlChar *name);
XML_DEPRECATED
XMLPUBFUN int
			htmlHandleOmittedElem(int val);

#ifdef LIBXML_PUSH_ENABLED
/*
 * Interfaces for the Push mode.
 */
XMLPUBFUN htmlParserCtxt *
			htmlCreatePushParserCtxt(htmlSAXHandler *sax,
						 void *user_data,
						 const char *chunk,
						 int size,
						 const char *filename,
						 xmlCharEncoding enc);
XMLPUBFUN int
			htmlParseChunk		(htmlParserCtxt *ctxt,
						 const char *chunk,
						 int size,
						 int terminate);
#endif /* LIBXML_PUSH_ENABLED */

XMLPUBFUN void
			htmlFreeParserCtxt	(htmlParserCtxt *ctxt);

/*
 * New set of simpler/more flexible APIs
 */

/**
 * This is the set of HTML parser options that can be passed to
 * #htmlReadDoc, #htmlCtxtSetOptions and other functions.
 */
typedef enum {
    /**
     * No effect as of 2.14.0.
     */
    HTML_PARSE_RECOVER = 1<<0,
    /**
     * Do not default to a doctype if none was found.
     */
    HTML_PARSE_NODEFDTD = 1<<2,
    /**
     * Disable error and warning reports to the error handlers.
     * Errors are still accessible with xmlCtxtGetLastError().
     */
    HTML_PARSE_NOERROR = 1<<5,
    /**
     * Disable warning reports.
     */
    HTML_PARSE_NOWARNING = 1<<6,
    /**
     * No effect.
     */
    HTML_PARSE_PEDANTIC = 1<<7,
    /**
     * Remove some text nodes containing only whitespace from the
     * result document. Which nodes are removed depends on a conservative
     * heuristic. The reindenting feature of the serialization code relies
     * on this option to be set when parsing. Use of this option is
     * DISCOURAGED.
     */
    HTML_PARSE_NOBLANKS = 1<<8,
    /**
     * No effect.
     */
    HTML_PARSE_NONET = 1<<11,
    /**
     * Do not add implied html, head or body elements.
     */
    HTML_PARSE_NOIMPLIED = 1<<13,
    /**
     * Store small strings directly in the node struct to save
     * memory.
    */
    HTML_PARSE_COMPACT = 1<<16,
    /**
     * Relax some internal limits. See XML_PARSE_HUGE in xmlParserOption.
     *
     * @since 2.14.0
     *
     * Use XML_PARSE_HUGE with older versions.
     */
    HTML_PARSE_HUGE = 1<<19,
    /**
     * Ignore the encoding in the HTML declaration. This option is
     * mostly unneeded these days. The only effect is to enforce
     * ISO-8859-1 decoding of ASCII-like data.
     */
    HTML_PARSE_IGNORE_ENC =1<<21,
    /**
     * Enable reporting of line numbers larger than 65535.
     *
     * @since 2.14.0
     *
     * Use XML_PARSE_BIG_LINES with older versions.
     */
    HTML_PARSE_BIG_LINES = 1<<22,
    /**
     * Make the tokenizer emit a SAX callback for each token. This results
     * in unbalanced invocations of startElement and endElement.
     *
     * For now, this is only usable to tokenize HTML5 with custom SAX
     * callbacks. A tree builder isn't implemented yet.
     *
     * @since 2.14.0
    */
    HTML_PARSE_HTML5 = 1<<26
} htmlParserOption;

XMLPUBFUN void
		htmlCtxtReset		(htmlParserCtxt *ctxt);
XMLPUBFUN int
		htmlCtxtSetOptions	(htmlParserCtxt *ctxt,
					 int options);
XMLPUBFUN int
		htmlCtxtUseOptions	(htmlParserCtxt *ctxt,
					 int options);
XMLPUBFUN xmlDoc *
		htmlReadDoc		(const xmlChar *cur,
					 const char *URL,
					 const char *encoding,
					 int options);
XMLPUBFUN xmlDoc *
		htmlReadFile		(const char *URL,
					 const char *encoding,
					 int options);
XMLPUBFUN xmlDoc *
		htmlReadMemory		(const char *buffer,
					 int size,
					 const char *URL,
					 const char *encoding,
					 int options);
XMLPUBFUN xmlDoc *
		htmlReadFd		(int fd,
					 const char *URL,
					 const char *encoding,
					 int options);
XMLPUBFUN xmlDoc *
		htmlReadIO		(xmlInputReadCallback ioread,
					 xmlInputCloseCallback ioclose,
					 void *ioctx,
					 const char *URL,
					 const char *encoding,
					 int options);
XMLPUBFUN xmlDoc *
		htmlCtxtParseDocument	(htmlParserCtxt *ctxt,
					 xmlParserInput *input);
XMLPUBFUN xmlDoc *
		htmlCtxtReadDoc		(xmlParserCtxt *ctxt,
					 const xmlChar *cur,
					 const char *URL,
					 const char *encoding,
					 int options);
XMLPUBFUN xmlDoc *
		htmlCtxtReadFile		(xmlParserCtxt *ctxt,
					 const char *filename,
					 const char *encoding,
					 int options);
XMLPUBFUN xmlDoc *
		htmlCtxtReadMemory		(xmlParserCtxt *ctxt,
					 const char *buffer,
					 int size,
					 const char *URL,
					 const char *encoding,
					 int options);
XMLPUBFUN xmlDoc *
		htmlCtxtReadFd		(xmlParserCtxt *ctxt,
					 int fd,
					 const char *URL,
					 const char *encoding,
					 int options);
XMLPUBFUN xmlDoc *
		htmlCtxtReadIO		(xmlParserCtxt *ctxt,
					 xmlInputReadCallback ioread,
					 xmlInputCloseCallback ioclose,
					 void *ioctx,
					 const char *URL,
					 const char *encoding,
					 int options);

/**
 * deprecated content model
 */
typedef enum {
  HTML_NA = 0 ,		/* something we don't check at all */
  HTML_INVALID = 0x1 ,
  HTML_DEPRECATED = 0x2 ,
  HTML_VALID = 0x4 ,
  HTML_REQUIRED = 0xc /* VALID bit set so ( & HTML_VALID ) is TRUE */
} htmlStatus ;

/* Using htmlElemDesc rather than name here, to emphasise the fact
   that otherwise there's a lookup overhead
*/
XML_DEPRECATED
XMLPUBFUN htmlStatus htmlAttrAllowed(const htmlElemDesc*, const xmlChar*, int) ;
XML_DEPRECATED
XMLPUBFUN int htmlElementAllowedHere(const htmlElemDesc*, const xmlChar*) ;
XML_DEPRECATED
XMLPUBFUN htmlStatus htmlElementStatusHere(const htmlElemDesc*, const htmlElemDesc*) ;
XML_DEPRECATED
XMLPUBFUN htmlStatus htmlNodeStatus(xmlNode *, int) ;

#ifdef __cplusplus
}
#endif

#endif /* LIBXML_HTML_ENABLED */
#endif /* __HTML_PARSER_H__ */