• 用xerces-c来进行xml schema校验


    在xerces-c的官方站点上有文章指引说明是怎样进行xml schema校验。

    http://xerces.apache.org/xerces-c/schema-3.html

    给出的样例代码:

    // Instantiate the DOM parser.
    XercesDOMParser parser;
    parser.setDoNamespaces(true);
    parser.setDoSchema(true);
    parser.parse(xmlFile);
    

    但。样例代码根本不起不论什么作用。

    在调用XercesDOMParser::parse之前,还有两件事情要做:

    1.调用XercesDOMParser::setValidationScheme来设置校验计划

    parser.setValidationScheme( XercesDOMParser::Val_Auto);

    parser.setValidationScheme( XercesDOMParser::Val_Always);

    2.要调用XercesDOMParser::setErrorHandler, 当中參数必须是ErrorHandler类或子类的对象。


    看以下样例

    address.xml:

    <?xml version="1.0" encoding="utf-8"?

    > <Address xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="address.xsd"> <Recipient>Mr. Walter C. Brown</Recipient> <House>good</House> <Street>Featherstone Street</Street> <Town>LONDON</Town> <PostCode>EC1Y 8SY</PostCode> <Country>UK</Country> </Address>


    address.xsd:

    <?xml version="1.0" encoding="utf-8"?

    > <xs:schema elementFormDefault="qualified" xmlns:xs="http://www.w3.org/2001/XMLSchema"> <xs:element name="Address"> <xs:complexType> <xs:sequence> <xs:element name="Recipient" type="xs:string" /> <xs:element name="House" type="xs:string" /> <xs:element name="Street" type="xs:string" /> <xs:element name="Town" type="xs:string" /> <xs:element name="County" type="xs:string" minOccurs="0" /> <xs:element name="PostCode" type="xs:unsignedInt" /> <xs:element name="Country" minOccurs="0"> <xs:simpleType> <xs:restriction base="xs:string"> <xs:enumeration value="IN" /> <xs:enumeration value="DE" /> <xs:enumeration value="ES" /> <xs:enumeration value="UK" /> <xs:enumeration value="US" /> </xs:restriction> </xs:simpleType> </xs:element> </xs:sequence> </xs:complexType> </xs:element> </xs:schema>


    new_address.cpp:

    #include <stdio.h>
    #include <xercesc/parsers/XercesDOMParser.hpp>
    #include <xercesc/sax/SAXException.hpp>
    #include <xercesc/dom/DOMException.hpp>
    #include <xercesc/dom/DOMElement.hpp>
    #include <xercesc/dom/DOMLSException.hpp>
    #include <xercesc/sax2/DefaultHandler.hpp>
    
    using namespace XERCES_CPP_NAMESPACE;
    
    class SchemaErrorHandler : public DefaultHandler
    {
    	public:
    		SchemaErrorHandler() {}
    		~SchemaErrorHandler() {}
    
    	void warning(const SAXParseException& exc)
    	{
    		printf( "warn in line:%lu, col:%lu, %s
    ", 
    				exc.getLineNumber(), exc.getColumnNumber(),
    				XMLString::transcode( exc.getMessage()) );
    	}
    
        void error(const SAXParseException& exc)
    	{
    		printf( "error in line:%lu, col:%lu, %s
    ", 
    				exc.getLineNumber(), exc.getColumnNumber(),
    				XMLString::transcode( exc.getMessage()) );
    	}
    
        void fatalError(const SAXParseException& exc)
    	{
    		printf( "fatal in line:%lu, col:%lu, %s
    ", 
    				exc.getLineNumber(), exc.getColumnNumber(),
    				XMLString::transcode( exc.getMessage()) );
    	}
    
        void resetErrors()
    	{
    		printf( "nothing
    " );
    	}
    };
    
    int main(int argc, char* argv[] )
    {
    	if ( argc < 2 )
    	{
    		printf( "must specify a file
    " );
    		return -1;
    	}
    
    	XMLPlatformUtils::Initialize();
    
    	XercesDOMParser parser;
    	SchemaErrorHandler handler;
    	try
    	{
    		parser.setErrorHandler( &handler );
    		parser.setDoNamespaces(true);
    		parser.setDoSchema(true);
    		//parser.setValidationScheme( XercesDOMParser::Val_Auto);
    		parser.parse( argv[1] );
    	} catch ( SAXException& e )
    	{
    		printf( "msg:%s
    ", XMLString::transcode(e.getMessage() ) );
    		return -2;
    	}
    	catch ( XMLException& e )
    	{
    		printf( "code:%d, msg:%s
    ", e.getCode(), XMLString::transcode( e.getMessage() ) );
    		return -3;
    	}
    	catch (	DOMException& e )
    	{
    		printf( "code:%d, msg:%s
    ", e.code, e.msg );
    		return -4;
    	}
    
    	return 0;
    }
    能够看到这里的代码凝视掉了这一行:

    //parser.setValidationScheme( XercesDOMParser::Val_Auto);


    编译执行:

    [xuzhina@localhost sample]$ g++ -g -o new_address new_address.cpp -lxerces-c
    [xuzhina@localhost sample]$ ./new_address address.xml 
    [xuzhina@localhost sample]$ 

    //parser.setValidationScheme( XercesDOMParser::Val_Auto);
    打开,但凝视掉

    parser.setErrorHandler( &handler );
    编译执行:

    [xuzhina@localhost sample]$ g++ -g -o new_address new_address.cpp -lxerces-c
    [xuzhina@localhost sample]$ ./new_address address.xml 
    [xuzhina@localhost sample]$ 

    parser.setErrorHandler( &handler );

    打开,编译执行:

    [xuzhina@localhost sample]$ ./new_address address.xml 
    error in line:8, col:31, value 'EC1Y 8SY' does not match regular expression facet '[+-]?[0-9]+'

    执行一下xmllint,对照一下结果:

    [xuzhina@localhost sample]$ xmllint --schema address.xsd address.xml
    <?xml version="1.0" encoding="utf-8"?>
    <Address xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="address.xsd">
    	<Recipient>Mr. Walter C. Brown</Recipient>
    	<House>good</House>
    	<Street>Featherstone Street</Street>
    	<Town>LONDON</Town>
    	<PostCode>EC1Y 8SY</PostCode>
    	<Country>UK</Country>
    </Address>
    address.xml:8: element PostCode: Schemas validity error : Element 'PostCode': 'EC1Y 8SY' is not a valid value of the atomic type 'xs:unsignedInt'.
    address.xml fails to validate

    PS:

    在xml schema中,string是兼容其他类型,比方在House标签的内容写上数字,比方49,不管xmllint, 还是xerces都不会报这个标签的值有问题。

    以前为这个问题折腾一个下午。

  • 相关阅读:
    如何根据二叉树 前序遍历 中序遍历 后序遍历 中的两种遍历来反推另一种遍历
    dijkstral改编
    纪念做出来的第一道计算几何题
    链式前向星
    一道简单树形dp
    算法进阶指南—特殊排序
    算法进阶指南二分章节的两道题
    秦皇岛winter camp 总结
    C
    一道cf水题
  • 原文地址:https://www.cnblogs.com/yxwkf/p/5161291.html
Copyright © 2020-2023  润新知