上文分析了jackrabbit对富文档的文本抽取实现的源码,我们注意到,上文的文本抽取类LazyTextExtractorField继承自lucene的AbstractField类,即LazyTextExtractorField类是Field类型的,这正是构建lucene索引Document里面的Field对象,那么jackrabbit是怎样构建Document对象的呢,这里面jackrabbit用到了一个叫NodeIndexer类,专门用来构建lucene的Document对象的,NodeIndexer类的源码如下:
/** * Creates a lucene <code>Document</code> object from a {@link javax.jcr.Node}. */ public class NodeIndexer { /** * The logger instance for this class. */ private static final Logger log = LoggerFactory.getLogger(NodeIndexer.class); /** * The default boost for a lucene field: 1.0f. */ protected static final float DEFAULT_BOOST = 1.0f; /** * The <code>NodeState</code> of the node to index */ protected final NodeState node; /** * The persistent item state provider */ protected final ItemStateManager stateProvider; /** * Namespace mappings to use for indexing. This is the internal * namespace mapping. */ protected final NamespaceMappings mappings; /** * Name and Path resolver. */ protected final NamePathResolver resolver; /** * Background task executor used for full text extraction. */ private final Executor executor; /** * Parser used for extracting text content from binary properties * for full text indexing. */ private final Parser parser; /** * The indexing configuration or <code>null</code> if none is available. */ protected IndexingConfiguration indexingConfig; /** * If set to <code>true</code> the fulltext field is stored and and a term * vector is created with offset information. */ protected boolean supportHighlighting = false; /** * Indicates index format for this node indexer. */ protected IndexFormatVersion indexFormatVersion = IndexFormatVersion.V1; /** * List of {@link FieldNames#FULLTEXT} fields which should not be used in * an excerpt. */ protected List<Fieldable> doNotUseInExcerpt = new ArrayList<Fieldable>(); /** * The maximum number of characters to extract from binaries. */ private int maxExtractLength = Integer.MAX_VALUE; /** * Creates a new node indexer. * * @param node the node state to index. * @param stateProvider the persistent item state manager to retrieve properties. * @param mappings internal namespace mappings. * @param executor background task executor for text extraction * @param parser parser for binary properties */ public NodeIndexer( NodeState node, ItemStateManager stateProvider, NamespaceMappings mappings, Executor executor, Parser parser) { this.node = node; this.stateProvider = stateProvider; this.mappings = mappings; this.resolver = NamePathResolverImpl.create(mappings); this.executor = executor; this.parser = parser; } /** * Returns the <code>NodeId</code> of the indexed node. * @return the <code>NodeId</code> of the indexed node. */ public NodeId getNodeId() { return node.getNodeId(); } /** * If set to <code>true</code> additional information is stored in the index * to support highlighting using the rep:excerpt pseudo property. * * @param b <code>true</code> to enable highlighting support. */ public void setSupportHighlighting(boolean b) { supportHighlighting = b; } /** * Sets the index format version * * @param indexFormatVersion the index format version */ public void setIndexFormatVersion(IndexFormatVersion indexFormatVersion) { this.indexFormatVersion = indexFormatVersion; } /** * Sets the indexing configuration for this node indexer. * * @param config the indexing configuration. */ public void setIndexingConfiguration(IndexingConfiguration config) { this.indexingConfig = config; } /** * Returns the maximum number of characters to extract from binaries. * * @return maximum extraction length */ public int getMaxExtractLength() { return maxExtractLength; } /** * Sets the maximum number of characters to extract from binaries. * * @param length maximum extraction length */ public void setMaxExtractLength(int length) { this.maxExtractLength = length; } /** * Creates a lucene Document. * * @return the lucene Document with the index layout. * @throws RepositoryException if an error occurs while reading property * values from the <code>ItemStateProvider</code>. */ public Document createDoc() throws RepositoryException { doNotUseInExcerpt.clear(); Document doc = new Document(); doc.setBoost(getNodeBoost()); // special fields // UUID doc.add(new IDField(node.getNodeId())); try { // parent UUID if (node.getParentId() == null) { // root node doc.add(new Field(FieldNames.PARENT, "", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); addNodeName(doc, "", ""); } else if (node.getSharedSet().isEmpty()) { addParentChildRelation(doc, node.getParentId()); } else { // shareable node for (NodeId id : node.getSharedSet()) { addParentChildRelation(doc, id); } // mark shareable nodes doc.add(new Field(FieldNames.SHAREABLE_NODE, "", Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS)); } } catch (NoSuchItemStateException e) { throwRepositoryException(e); } catch (ItemStateException e) { throwRepositoryException(e); } catch (NamespaceException e) { // will never happen, because this.mappings will dynamically add // unknown uri<->prefix mappings } Set<Name> props = node.getPropertyNames(); for (Name propName : props) { PropertyId id = new PropertyId(node.getNodeId(), propName); try { PropertyState propState = (PropertyState) stateProvider.getItemState(id); // add each property to the _PROPERTIES_SET for searching // beginning with V2 if (indexFormatVersion.getVersion() >= IndexFormatVersion.V2.getVersion()) { addPropertyName(doc, propState.getName()); } InternalValue[] values = propState.getValues(); for (InternalValue value : values) { addValue(doc, value, propState.getName()); } if (values.length > 1) { // real multi-valued addMVPName(doc, propState.getName()); } } catch (NoSuchItemStateException e) { throwRepositoryException(e); } catch (ItemStateException e) { throwRepositoryException(e); } } // now add fields that are not used in excerpt (must go at the end) for (Fieldable field : doNotUseInExcerpt) { doc.add(field); } return doc; } /** * Wraps the exception <code>e</code> into a <code>RepositoryException</code> * and throws the created exception. * * @param e the base exception. */ protected void throwRepositoryException(Exception e) throws RepositoryException { String msg = "Error while indexing node: " + node.getNodeId() + " of " + "type: " + node.getNodeTypeName(); throw new RepositoryException(msg, e); } /** * Adds a {@link FieldNames#MVP} field to <code>doc</code> with the resolved * <code>name</code> using the internal search index namespace mapping. * * @param doc the lucene document. * @param name the name of the multi-value property. */ protected void addMVPName(Document doc, Name name) { try { String propName = resolver.getJCRName(name); doc.add(new Field(FieldNames.MVP, propName, Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS, Field.TermVector.NO)); } catch (NamespaceException e) { // will never happen, prefixes are created dynamically } } /** * Adds a value to the lucene Document. * * @param doc the document. * @param value the internal jackrabbit value. * @param name the name of the property. */ protected void addValue(Document doc, InternalValue value, Name name) throws RepositoryException { String fieldName = name.getLocalName(); try { fieldName = resolver.getJCRName(name); } catch (NamespaceException e) { // will never happen } switch (value.getType()) { case PropertyType.BINARY: if (isIndexed(name)) { addBinaryValue(doc, fieldName, value); } break; case PropertyType.BOOLEAN: if (isIndexed(name)) { addBooleanValue(doc, fieldName, value.getBoolean()); } break; case PropertyType.DATE: if (isIndexed(name)) { addCalendarValue(doc, fieldName, value.getDate()); } break; case PropertyType.DOUBLE: if (isIndexed(name)) { addDoubleValue(doc, fieldName, value.getDouble()); } break; case PropertyType.LONG: if (isIndexed(name)) { addLongValue(doc, fieldName, value.getLong()); } break; case PropertyType.REFERENCE: if (isIndexed(name)) { addReferenceValue(doc, fieldName, value.getNodeId(), false); } break; case PropertyType.WEAKREFERENCE: if (isIndexed(name)) { addReferenceValue(doc, fieldName, value.getNodeId(), true); } break; case PropertyType.PATH: if (isIndexed(name)) { addPathValue(doc, fieldName, value.getPath()); } break; case PropertyType.URI: if (isIndexed(name)) { addURIValue(doc, fieldName, value.getURI()); } break; case PropertyType.STRING: if (isIndexed(name)) { // never fulltext index jcr:uuid String if (name.equals(NameConstants.JCR_UUID)) { addStringValue(doc, fieldName, value.getString(), false, false, DEFAULT_BOOST); } else { addStringValue(doc, fieldName, value.getString(), true, isIncludedInNodeIndex(name), getPropertyBoost(name), useInExcerpt(name)); } } break; case PropertyType.NAME: // jcr:primaryType and jcr:mixinTypes are required for correct // node type resolution in queries if (name.equals(NameConstants.JCR_PRIMARYTYPE) || name.equals(NameConstants.JCR_MIXINTYPES) || isIndexed(name)) { addNameValue(doc, fieldName, value.getName()); } break; case PropertyType.DECIMAL: if (isIndexed(name)) { addDecimalValue(doc, fieldName, value.getDecimal()); } break; default: throw new IllegalArgumentException("illegal internal value type: " + value.getType()); } // add length if (indexFormatVersion.getVersion() >= IndexFormatVersion.V3.getVersion()) { addLength(doc, fieldName, value); } } /** * Adds the property name to the lucene _:PROPERTIES_SET field. * * @param doc the document. * @param name the name of the property. */ protected void addPropertyName(Document doc, Name name) { String fieldName = name.getLocalName(); try { fieldName = resolver.getJCRName(name); } catch (NamespaceException e) { // will never happen } doc.add(new Field(FieldNames.PROPERTIES_SET, fieldName, Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS)); } /** * Adds the binary value to the document as the named field. * <p/> * This implementation checks if this {@link #node} is of type nt:resource * and if that is the case, tries to extract text from the binary property * using the {@link #extractor}. * * @param doc The document to which to add the field * @param fieldName The name of the field to add * @param internalValue The value for the field to add to the document. */ protected void addBinaryValue(Document doc, String fieldName, InternalValue internalValue) { // 'check' if node is of type nt:resource try { String jcrData = mappings.getPrefix(Name.NS_JCR_URI) + ":data"; if (!jcrData.equals(fieldName)) { // don't know how to index return; } InternalValue type = getValue(NameConstants.JCR_MIMETYPE); if (type != null) { Metadata metadata = new Metadata(); metadata.set(Metadata.CONTENT_TYPE, type.getString()); // jcr:encoding is not mandatory InternalValue encoding = getValue(NameConstants.JCR_ENCODING); if (encoding != null) { metadata.set( Metadata.CONTENT_ENCODING, encoding.getString()); } doc.add(createFulltextField(internalValue, metadata)); } } catch (Throwable t) { // TODO: How to recover from a transient indexing failure? log.warn("Exception while indexing binary property", t); } } /** * Utility method that extracts the first value of the named property * of the current node. Returns <code>null</code> if the property does * not exist or contains no values. * * @param name property name * @return value of the named property, or <code>null</code> * @throws ItemStateException if the property can not be accessed */ protected InternalValue getValue(Name name) throws ItemStateException { try { PropertyId id = new PropertyId(node.getNodeId(), name); PropertyState property = (PropertyState) stateProvider.getItemState(id); InternalValue[] values = property.getValues(); if (values.length > 0) { return values[0]; } else { return null; } } catch (NoSuchItemStateException e) { return null; } } /** * Adds the string representation of the boolean value to the document as * the named field. * * @param doc The document to which to add the field * @param fieldName The name of the field to add * @param internalValue The value for the field to add to the document. */ protected void addBooleanValue(Document doc, String fieldName, Object internalValue) { doc.add(createFieldWithoutNorms(fieldName, internalValue.toString(), PropertyType.BOOLEAN)); } /** * Creates a field of name <code>fieldName</code> with the value of <code> * internalValue</code>. The created field is indexed without norms. * * @param fieldName The name of the field to add * @param internalValue The value for the field to add to the document. * @param propertyType the property type. */ protected Field createFieldWithoutNorms(String fieldName, String internalValue, int propertyType) { if (indexFormatVersion.getVersion() >= IndexFormatVersion.V3.getVersion()) { Field field = new Field(FieldNames.PROPERTIES, new SingletonTokenStream( FieldNames.createNamedValue(fieldName, internalValue), propertyType) ); field.setOmitNorms(true); return field; } else { return new Field(FieldNames.PROPERTIES, FieldNames.createNamedValue(fieldName, internalValue), Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS, Field.TermVector.NO); } } /** * Adds the calendar value to the document as the named field. The calendar * value is converted to an indexable string value using the * {@link DateField} class. * * @param doc * The document to which to add the field * @param fieldName * The name of the field to add * @param internalValue * The value for the field to add to the document. */ protected void addCalendarValue(Document doc, String fieldName, Object internalValue) { Calendar value = (Calendar) internalValue; long millis = value.getTimeInMillis(); try { doc.add(createFieldWithoutNorms(fieldName, DateField.timeToString(millis), PropertyType.DATE)); } catch (IllegalArgumentException e) { log.warn("'{}' is outside of supported date value range.", new Date(value.getTimeInMillis())); } } /** * Adds the double value to the document as the named field. The double * value is converted to an indexable string value using the * {@link DoubleField} class. * * @param doc The document to which to add the field * @param fieldName The name of the field to add * @param internalValue The value for the field to add to the document. */ protected void addDoubleValue(Document doc, String fieldName, Object internalValue) { double doubleVal = (Double) internalValue; doc.add(createFieldWithoutNorms(fieldName, DoubleField.doubleToString(doubleVal), PropertyType.DOUBLE)); } /** * Adds the long value to the document as the named field. The long * value is converted to an indexable string value using the {@link LongField} * class. * * @param doc The document to which to add the field * @param fieldName The name of the field to add * @param internalValue The value for the field to add to the document. */ protected void addLongValue(Document doc, String fieldName, Object internalValue) { long longVal = (Long) internalValue; doc.add(createFieldWithoutNorms(fieldName, LongField.longToString(longVal), PropertyType.LONG)); } /** * Adds the long value to the document as the named field. The long * value is converted to an indexable string value using the {@link LongField} * class. * * @param doc The document to which to add the field * @param fieldName The name of the field to add * @param internalValue The value for the field to add to the document. */ protected void addDecimalValue(Document doc, String fieldName, Object internalValue) { BigDecimal decVal = (BigDecimal) internalValue; doc.add(createFieldWithoutNorms(fieldName, DecimalField.decimalToString(decVal), PropertyType.DECIMAL)); } /** * Adds the reference value to the document as the named field. The value's * string representation is added as the reference data. Additionally the * reference data is stored in the index. As of Jackrabbit 2.0 this method * also adds the reference UUID as a {@link FieldNames#WEAK_REFS} field * to the index if it is a weak reference. * * @param doc The document to which to add the field * @param fieldName The name of the field to add * @param internalValue The value for the field to add to the document. * @param weak Flag indicating whether it's a WEAKREFERENCE (true) or a REFERENCE (flase) */ protected void addReferenceValue(Document doc, String fieldName, Object internalValue, boolean weak) { String uuid = internalValue.toString(); doc.add(createFieldWithoutNorms(fieldName, uuid, weak ? PropertyType.WEAKREFERENCE : PropertyType.REFERENCE)); doc.add(new Field(FieldNames.PROPERTIES, FieldNames.createNamedValue(fieldName, uuid), Field.Store.YES, Field.Index.NO, Field.TermVector.NO)); if (weak) { doc.add(new Field(FieldNames.WEAK_REFS, uuid, Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS)); } } /** * Adds the path value to the document as the named field. The path * value is converted to an indexable string value using the name space * mappings with which this class has been created. * * @param doc The document to which to add the field * @param fieldName The name of the field to add * @param internalValue The value for the field to add to the document. */ protected void addPathValue(Document doc, String fieldName, Object internalValue) { Path path = (Path) internalValue; String pathString = path.toString(); try { pathString = resolver.getJCRPath(path); } catch (NamespaceException e) { // will never happen } doc.add(createFieldWithoutNorms(fieldName, pathString, PropertyType.PATH)); } /** * Adds the uri value to the document as the named field. * * @param doc The document to which to add the field * @param fieldName The name of the field to add * @param internalValue The value for the field to add to the document. */ protected void addURIValue(Document doc, String fieldName, Object internalValue) { URI uri = (URI) internalValue; doc.add(createFieldWithoutNorms(fieldName, uri.toString(), PropertyType.URI)); } /** * Adds the string value to the document both as the named field and for * full text indexing. * * @param doc The document to which to add the field * @param fieldName The name of the field to add * @param internalValue The value for the field to add to the document. * @deprecated Use {@link #addStringValue(Document, String, Object, boolean) * addStringValue(Document, String, Object, boolean)} instead. */ protected void addStringValue(Document doc, String fieldName, Object internalValue) { addStringValue(doc, fieldName, internalValue, true, true, DEFAULT_BOOST); } /** * Adds the string value to the document both as the named field and * optionally for full text indexing if <code>tokenized</code> is * <code>true</code>. * * @param doc The document to which to add the field * @param fieldName The name of the field to add * @param internalValue The value for the field to add to the document. * @param tokenized If <code>true</code> the string is also tokenized * and fulltext indexed. */ protected void addStringValue(Document doc, String fieldName, Object internalValue, boolean tokenized) { addStringValue(doc, fieldName, internalValue, tokenized, true, DEFAULT_BOOST); } /** * Adds the string value to the document both as the named field and * optionally for full text indexing if <code>tokenized</code> is * <code>true</code>. * * @param doc The document to which to add the field * @param fieldName The name of the field to add * @param internalValue The value for the field to add to the * document. * @param tokenized If <code>true</code> the string is also * tokenized and fulltext indexed. * @param includeInNodeIndex If <code>true</code> the string is also * tokenized and added to the node scope fulltext * index. * @param boost the boost value for this string field. * @deprecated use {@link #addStringValue(Document, String, Object, boolean, boolean, float, boolean)} instead. */ protected void addStringValue(Document doc, String fieldName, Object internalValue, boolean tokenized, boolean includeInNodeIndex, float boost) { addStringValue(doc, fieldName, internalValue, tokenized, includeInNodeIndex, boost, true); } /** * Adds the string value to the document both as the named field and * optionally for full text indexing if <code>tokenized</code> is * <code>true</code>. * * @param doc The document to which to add the field * @param fieldName The name of the field to add * @param internalValue The value for the field to add to the * document. * @param tokenized If <code>true</code> the string is also * tokenized and fulltext indexed. * @param includeInNodeIndex If <code>true</code> the string is also * tokenized and added to the node scope fulltext * index. * @param boost the boost value for this string field. * @param useInExcerpt If <code>true</code> the string may show up in * an excerpt. */ protected void addStringValue(Document doc, String fieldName, Object internalValue, boolean tokenized, boolean includeInNodeIndex, float boost, boolean useInExcerpt) { // simple String String stringValue = (String) internalValue; doc.add(createFieldWithoutNorms(fieldName, stringValue, PropertyType.STRING)); if (tokenized) { if (stringValue.length() == 0) { return; } // create fulltext index on property int idx = fieldName.indexOf(':'); fieldName = fieldName.substring(0, idx + 1) + FieldNames.FULLTEXT_PREFIX + fieldName.substring(idx + 1); Field f = new Field(fieldName, stringValue, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO); f.setBoost(boost); doc.add(f); if (includeInNodeIndex) { // also create fulltext index of this value boolean store = supportHighlighting && useInExcerpt; f = createFulltextField(stringValue, store, supportHighlighting); if (useInExcerpt) { doc.add(f); } else { doNotUseInExcerpt.add(f); } } } } /** * Adds the name value to the document as the named field. The name * value is converted to an indexable string treating the internal value * as a <code>Name</code> and mapping the name space using the name space * mappings with which this class has been created. * * @param doc The document to which to add the field * @param fieldName The name of the field to add * @param internalValue The value for the field to add to the document. */ protected void addNameValue(Document doc, String fieldName, Object internalValue) { try { Name qualiName = (Name) internalValue; String normValue = mappings.getPrefix(qualiName.getNamespaceURI()) + ":" + qualiName.getLocalName(); doc.add(createFieldWithoutNorms(fieldName, normValue, PropertyType.NAME)); } catch (NamespaceException e) { // will never happen } } /** * Creates a fulltext field for the string <code>value</code>. * * @param value the string value. * @return a lucene field. * @deprecated use {@link #createFulltextField(String, boolean, boolean)} instead. */ protected Field createFulltextField(String value) { return createFulltextField(value, supportHighlighting, supportHighlighting); } /** * Creates a fulltext field for the string <code>value</code>. * * @param value the string value. * @param store if the value of the field should be stored. * @param withOffsets if a term vector with offsets should be stored. * @return a lucene field. */ protected Field createFulltextField(String value, boolean store, boolean withOffsets) { Field.TermVector tv; if (withOffsets) { tv = Field.TermVector.WITH_OFFSETS; } else { tv = Field.TermVector.NO; } if (store) { // store field compressed if greater than 16k Field.Store stored; if (value.length() > 0x4000) { stored = Field.Store.COMPRESS; } else { stored = Field.Store.YES; } return new Field(FieldNames.FULLTEXT, value, stored, Field.Index.ANALYZED, tv); } else { return new Field(FieldNames.FULLTEXT, value, Field.Store.NO, Field.Index.ANALYZED, tv); } } /** * Creates a fulltext field for the reader <code>value</code>. * * @param value the binary value * @param metadata document metatadata * @return a lucene field. */ protected Fieldable createFulltextField( InternalValue value, Metadata metadata) { return new LazyTextExtractorField( parser, value, metadata, executor, supportHighlighting, getMaxExtractLength()); } /** * Returns <code>true</code> if the property with the given name should be * indexed. * * @param propertyName name of a property. * @return <code>true</code> if the property should be fulltext indexed; * <code>false</code> otherwise. */ protected boolean isIndexed(Name propertyName) { if (indexingConfig == null) { return true; } else { return indexingConfig.isIndexed(node, propertyName); } } /** * Returns <code>true</code> if the property with the given name should also * be added to the node scope index. * * @param propertyName the name of a property. * @return <code>true</code> if it should be added to the node scope index; * <code>false</code> otherwise. */ protected boolean isIncludedInNodeIndex(Name propertyName) { if (indexingConfig == null) { return true; } else { return indexingConfig.isIncludedInNodeScopeIndex(node, propertyName); } } /** * Returns <code>true</code> if the content of the property with the given * name should the used to create an excerpt. * * @param propertyName the name of a property. * @return <code>true</code> if it should be used to create an excerpt; * <code>false</code> otherwise. */ protected boolean useInExcerpt(Name propertyName) { if (indexingConfig == null) { return true; } else { return indexingConfig.useInExcerpt(node, propertyName); } } /** * Returns the boost value for the given property name. * * @param propertyName the name of a property. * @return the boost value for the given property name. */ protected float getPropertyBoost(Name propertyName) { if (indexingConfig == null) { return DEFAULT_BOOST; } else { return indexingConfig.getPropertyBoost(node, propertyName); } } /** * @return the boost value for this {@link #node} state. */ protected float getNodeBoost() { if (indexingConfig == null) { return DEFAULT_BOOST; } else { return indexingConfig.getNodeBoost(node); } } /** * Adds a {@link FieldNames#PROPERTY_LENGTHS} field to <code>document</code> * with a named length value. * * @param doc the lucene document. * @param propertyName the property name. * @param value the internal value. */ protected void addLength(Document doc, String propertyName, InternalValue value) { long length = Util.getLength(value); if (length != -1) { doc.add(new Field(FieldNames.PROPERTY_LENGTHS, FieldNames.createNamedLength(propertyName, length), Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS)); } } /** * Depending on the index format version adds one or two fields to the * document for the node name. * * @param doc the lucene document. * @param namespaceURI the namespace URI of the node name. * @param localName the local name of the node. */ protected void addNodeName(Document doc, String namespaceURI, String localName) throws NamespaceException { String name = mappings.getPrefix(namespaceURI) + ":" + localName; doc.add(new Field(FieldNames.LABEL, name, Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS)); // as of version 3, also index combination of namespace URI and local name if (indexFormatVersion.getVersion() >= IndexFormatVersion.V3.getVersion()) { doc.add(new Field(FieldNames.NAMESPACE_URI, namespaceURI, Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS)); doc.add(new Field(FieldNames.LOCAL_NAME, localName, Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS)); } } /** * Adds a parent child relation to the given <code>doc</code>. * * @param doc the document. * @param parentId the id of the parent node. * @throws ItemStateException if the parent node cannot be read. * @throws RepositoryException if the parent node does not have a child node * entry for the current node. */ protected void addParentChildRelation(Document doc, NodeId parentId) throws ItemStateException, RepositoryException { doc.add(new Field( FieldNames.PARENT, parentId.toString(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS, Field.TermVector.NO)); NodeState parent = (NodeState) stateProvider.getItemState(parentId); ChildNodeEntry child = parent.getChildNodeEntry(node.getNodeId()); if (child == null) { // this can only happen when jackrabbit // is running in a cluster. throw new RepositoryException( "Missing child node entry for node with id: " + node.getNodeId()); } Name name = child.getName(); addNodeName(doc, name.getNamespaceURI(), name.getLocalName()); } }
如果我们把一个类当做一个黑箱,我们就要了解的逻辑是在我们输入什么信息的情况下,然后是我们能够得到什么信息
循着这种思路,首先要了解的是一个类的构造方法参数,其次再是其他功能方法参数,然后才是返回的类型了
如是,NodeIndexer类的构造方法如下
/** * Creates a new node indexer. * * @param node the node state to index. * @param stateProvider the persistent item state manager to retrieve properties. * @param mappings internal namespace mappings. * @param executor background task executor for text extraction * @param parser parser for binary properties */ public NodeIndexer( NodeState node, ItemStateManager stateProvider, NamespaceMappings mappings, Executor executor, Parser parser) { this.node = node; this.stateProvider = stateProvider; this.mappings = mappings; this.resolver = NamePathResolverImpl.create(mappings); this.executor = executor; this.parser = parser; }
由此可以看到,NodeIndexer类实现lucene的Document对象构建的信息来源主要是依赖 NodeState node参数的
其次是NodeIndexer类最重要的构建lucene的Document方法
/** * Creates a lucene Document. * * @return the lucene Document with the index layout. * @throws RepositoryException if an error occurs while reading property * values from the <code>ItemStateProvider</code>. */ public Document createDoc() throws RepositoryException { doNotUseInExcerpt.clear(); Document doc = new Document(); doc.setBoost(getNodeBoost()); // special fields // UUID doc.add(new IDField(node.getNodeId())); try { // parent UUID if (node.getParentId() == null) { // root node doc.add(new Field(FieldNames.PARENT, "", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); addNodeName(doc, "", ""); } else if (node.getSharedSet().isEmpty()) { addParentChildRelation(doc, node.getParentId()); } else { // shareable node for (NodeId id : node.getSharedSet()) { addParentChildRelation(doc, id); } // mark shareable nodes doc.add(new Field(FieldNames.SHAREABLE_NODE, "", Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS)); } } catch (NoSuchItemStateException e) { throwRepositoryException(e); } catch (ItemStateException e) { throwRepositoryException(e); } catch (NamespaceException e) { // will never happen, because this.mappings will dynamically add // unknown uri<->prefix mappings } Set<Name> props = node.getPropertyNames(); for (Name propName : props) { PropertyId id = new PropertyId(node.getNodeId(), propName); try { PropertyState propState = (PropertyState) stateProvider.getItemState(id); // add each property to the _PROPERTIES_SET for searching // beginning with V2 if (indexFormatVersion.getVersion() >= IndexFormatVersion.V2.getVersion()) { addPropertyName(doc, propState.getName()); } InternalValue[] values = propState.getValues(); for (InternalValue value : values) { addValue(doc, value, propState.getName()); } if (values.length > 1) { // real multi-valued addMVPName(doc, propState.getName()); } } catch (NoSuchItemStateException e) { throwRepositoryException(e); } catch (ItemStateException e) { throwRepositoryException(e); } } // now add fields that are not used in excerpt (must go at the end) for (Fieldable field : doNotUseInExcerpt) { doc.add(field); } return doc; }
这里面根据 构造方法初始化的成员变量NodeState node的属性集依次向Document添加不同类型的Field对象
---------------------------------------------------------------------------
本系列Apache Jackrabbit源码研究系本人原创
转载请注明出处 博客园 刺猬的温驯
本文链接 http://www.cnblogs.com/chenying99/archive/2013/04/03/3002823.html