View Javadoc
1   /*
2    *  Licensed to the Apache Software Foundation (ASF) under one
3    *  or more contributor license agreements.  See the NOTICE file
4    *  distributed with this work for additional information
5    *  regarding copyright ownership.  The ASF licenses this file
6    *  to you under the Apache License, Version 2.0 (the
7    *  "License"); you may not use this file except in compliance
8    *  with the License.  You may obtain a copy of the License at
9    *  
10   *    http://www.apache.org/licenses/LICENSE-2.0
11   *  
12   *  Unless required by applicable law or agreed to in writing,
13   *  software distributed under the License is distributed on an
14   *  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   *  KIND, either express or implied.  See the License for the
16   *  specific language governing permissions and limitations
17   *  under the License. 
18   *  
19   */
20  package org.apache.directory.server.xdbm.search.impl;
21  
22  
23  import java.io.IOException;
24  import java.util.HashSet;
25  import java.util.List;
26  import java.util.Set;
27  
28  import org.apache.directory.api.ldap.model.constants.SchemaConstants;
29  import org.apache.directory.api.ldap.model.cursor.Cursor;
30  import org.apache.directory.api.ldap.model.exception.LdapException;
31  import org.apache.directory.api.ldap.model.exception.LdapOtherException;
32  import org.apache.directory.api.ldap.model.filter.AndNode;
33  import org.apache.directory.api.ldap.model.filter.ApproximateNode;
34  import org.apache.directory.api.ldap.model.filter.AssertionNode;
35  import org.apache.directory.api.ldap.model.filter.BranchNode;
36  import org.apache.directory.api.ldap.model.filter.EqualityNode;
37  import org.apache.directory.api.ldap.model.filter.ExprNode;
38  import org.apache.directory.api.ldap.model.filter.ExtensibleNode;
39  import org.apache.directory.api.ldap.model.filter.GreaterEqNode;
40  import org.apache.directory.api.ldap.model.filter.LeafNode;
41  import org.apache.directory.api.ldap.model.filter.LessEqNode;
42  import org.apache.directory.api.ldap.model.filter.NotNode;
43  import org.apache.directory.api.ldap.model.filter.OrNode;
44  import org.apache.directory.api.ldap.model.filter.PresenceNode;
45  import org.apache.directory.api.ldap.model.filter.ScopeNode;
46  import org.apache.directory.api.ldap.model.filter.SimpleNode;
47  import org.apache.directory.api.ldap.model.filter.SubstringNode;
48  import org.apache.directory.api.util.Strings;
49  import org.apache.directory.server.core.api.partition.Partition;
50  import org.apache.directory.server.core.api.partition.PartitionTxn;
51  import org.apache.directory.server.i18n.I18n;
52  import org.apache.directory.server.xdbm.Index;
53  import org.apache.directory.server.xdbm.IndexNotFoundException;
54  import org.apache.directory.server.xdbm.Store;
55  import org.apache.directory.server.xdbm.search.Optimizer;
56  
57  
58  /**
59   * Optimizer that annotates the filter using scan counts.
60   * 
61   * @author <a href="mailto:dev@directory.apache.org">Apache Directory Project</a>
62   */
63  public class DefaultOptimizer implements Optimizer
64  {
65      /* Package protected*/ static final String CANDIDATES_ANNOTATION_KEY = "candidates";
66      
67      /* Package protected*/ static final String COUNT_ANNOTATION = "count"; 
68  
69      /** the database this optimizer operates on */
70      private final Store db;
71      private String contextEntryId;
72  
73  
74      /**
75       * Creates an optimizer on a database.
76       *
77       * @param db the database this optimizer works for.
78       */
79      public DefaultOptimizer( Store db )
80      {
81          this.db = db;
82      }
83  
84  
85      // This will suppress PMD.EmptyCatchBlock warnings in this method
86      @SuppressWarnings("PMD.EmptyCatchBlock")
87      private String getContextEntryId( PartitionTxn partitionTxn ) throws LdapException
88      {
89          if ( contextEntryId == null )
90          {
91              try
92              {
93                  this.contextEntryId = db.getEntryId( partitionTxn, ( ( Partition ) db ).getSuffixDn() );
94              }
95              catch ( Exception e )
96              {
97                  // might not have been created
98              }
99          }
100 
101         if ( contextEntryId == null )
102         {
103             return Partition.DEFAULT_ID;
104         }
105 
106         return contextEntryId;
107     }
108 
109 
110     /**
111      * Annotates the expression tree to determine optimal evaluation order based
112      * on the scan count for indices that exist for each expression node.  If an
113      * index on the attribute does not exist an IndexNotFoundException will be
114      * thrown.
115      *
116      * {@inheritDoc}
117      */
118     @Override
119     @SuppressWarnings("unchecked")
120     public Long annotate( PartitionTxn partitionTxn, ExprNode node ) throws LdapException
121     {
122         // Start off with the worst case unless scan count says otherwise.
123         Long count = Long.MAX_VALUE;
124 
125         /* --------------------------------------------------------------------
126          *                 H A N D L E   L E A F   N O D E S          
127          * --------------------------------------------------------------------
128          * 
129          * Each leaf node is based on an attribute and it represents a condition
130          * that needs to be statisfied.  We ask the index (if one exists) for 
131          * the attribute to give us a scan count of all the candidates that 
132          * would satisfy the attribute assertion represented by the leaf node.
133          * 
134          * This is conducted differently based on the type of the leaf node.
135          * Comments on each node type explain how each scan count is arrived at.
136          */
137 
138         if ( node instanceof ScopeNode )
139         {
140             count = getScopeScan( partitionTxn, ( ScopeNode ) node );
141         }
142         else if ( node instanceof AssertionNode )
143         {
144             /* 
145              * Leave it up to the assertion node to determine just how much it
146              * will cost us.  Anyway it defaults to a maximum scan count if a
147              * scan count is not specified by the implementation.
148              */
149         }
150         else if ( node.isLeaf() )
151         {
152             LeafNode leaf = ( LeafNode ) node;
153 
154             try
155             {  
156                 if ( node instanceof PresenceNode )
157                 {
158                     count = getPresenceScan( partitionTxn, ( PresenceNode ) leaf );
159                 }
160                 else if ( node instanceof EqualityNode )
161                 {
162                     count = getEqualityScan( partitionTxn, ( EqualityNode ) leaf );
163                 }
164                 else if ( node instanceof GreaterEqNode )
165                 {
166                     count = getGreaterLessScan( partitionTxn, ( GreaterEqNode ) leaf, SimpleNode.EVAL_GREATER );
167                 }
168                 else if ( node instanceof LessEqNode )
169                 {
170                     count = getGreaterLessScan( partitionTxn, ( SimpleNode ) leaf, SimpleNode.EVAL_LESSER );
171                 }
172                 else if ( node instanceof SubstringNode )
173                 {
174                     /** Cannot really say so we presume the total index count */
175                     count = getSubstringScan( partitionTxn, ( SubstringNode ) leaf );
176                 }
177                 else if ( node instanceof ExtensibleNode )
178                 {
179                     /** Cannot really say so we presume the total index count */
180                     count = getFullScan( partitionTxn, leaf );
181                 }
182                 else if ( node instanceof ApproximateNode )
183                 {
184                     /** Feature not implemented so we just use equality matching */
185                     count = getEqualityScan( partitionTxn, ( ApproximateNode ) leaf );
186                 }
187                 else
188                 {
189                     throw new IllegalArgumentException( I18n.err( I18n.ERR_711 ) );
190                 }
191             }
192             catch ( IndexNotFoundException | IOException e )
193             {
194                 throw new LdapOtherException( e.getMessage(), e );
195             }
196         }
197         // --------------------------------------------------------------------
198         //                 H A N D L E   B R A N C H   N O D E S       
199         // --------------------------------------------------------------------
200         else
201         {
202             if ( node instanceof AndNode )
203             {
204                 count = getConjunctionScan( partitionTxn, ( AndNode ) node );
205             }
206             else if ( node instanceof OrNode )
207             {
208                 count = getDisjunctionScan( partitionTxn, ( OrNode ) node );
209             }
210             else if ( node instanceof NotNode )
211             {
212                 annotate( partitionTxn, ( ( NotNode ) node ).getFirstChild() );
213 
214                 /*
215                  * A negation filter is always worst case since we will have
216                  * to retrieve all entries from the master table then test
217                  * each one against the negated child filter.  There is no way
218                  * to use the indices.
219                  */
220                 count = Long.MAX_VALUE;
221             }
222             else
223             {
224                 throw new IllegalArgumentException( I18n.err( I18n.ERR_712 ) );
225             }
226         }
227 
228         // Protect against overflow when counting.
229         if ( count < 0L )
230         {
231             count = Long.MAX_VALUE;
232         }
233 
234         node.set( COUNT_ANNOTATION, count );
235 
236         return count;
237     }
238 
239 
240     /**
241      * ANDs or Conjunctions take the count of the smallest child as their count.
242      * This is the best that a conjunction can do and should be used rather than
243      * the worst case. Notice that we annotate the child node with a recursive 
244      * call before accessing its count parameter making the chain recursion 
245      * depth first.
246      *
247      * @param node a AND (Conjunction) BranchNode
248      * @return the calculated scan count
249      * @throws Exception if there is an error
250      */
251     private long getConjunctionScan( PartitionTxn partitionTxn, BranchNode node ) throws LdapException
252     {
253         long count = Long.MAX_VALUE;
254         List<ExprNode> children = node.getChildren();
255 
256         for ( ExprNode child : children )
257         {
258             if ( ( count == 1 ) && ( child instanceof ScopeNode ) )
259             {
260                 // We can stop here
261                 break;
262             }
263 
264             annotate( partitionTxn, child );
265             count = Math.min( ( ( Long ) child.get( COUNT_ANNOTATION ) ), count );
266 
267             if ( count == 0 )
268             {
269                 // No need to continue
270                 break;
271             }
272         }
273 
274         return count;
275     }
276 
277 
278     /**
279      * Disjunctions (OR) are the union of candidates across all subexpressions 
280      * so we add all the counts of the child nodes. Notice that we annotate the 
281      * child node with a recursive call.
282      *
283      * @param node the OR branch node
284      * @return the scan count on the OR node
285      * @throws Exception if there is an error
286      */
287     private long getDisjunctionScan( PartitionTxn partitionTxn, BranchNode node ) throws LdapException
288     {
289         List<ExprNode> children = node.getChildren();
290         long total = 0L;
291 
292         for ( ExprNode child : children )
293         {
294             annotate( partitionTxn, child );
295             total += ( Long ) child.get( COUNT_ANNOTATION );
296 
297             if ( total == Long.MAX_VALUE )
298             {
299                 // We can stop here withoit evaluating the following filters
300                 break;
301             }
302         }
303 
304         return total;
305     }
306 
307 
308     /**
309      * Gets the worst case scan count for all entries that satisfy the equality
310      * assertion in the SimpleNode argument.  
311      *
312      * @param node the node to get a scan count for 
313      * @return the worst case
314      * @throws Exception if there is an error accessing an index
315      */
316     @SuppressWarnings("unchecked")
317     private <V> long getEqualityScan( PartitionTxn partitionTxn, SimpleNode<V> node ) throws LdapException, IndexNotFoundException, IOException
318     {
319         if ( db.hasIndexOn( node.getAttributeType() ) )
320         {
321             Index<V, String> idx = ( Index<V, String> ) db.getIndex( node.getAttributeType() );
322 
323             String normalizedKey;
324             
325             if ( node.getValue().isSchemaAware() )
326             {
327                 normalizedKey = node.getValue().getNormalized();
328             }
329             else
330             {
331                 normalizedKey = node.getAttributeType().getEquality().getNormalizer().normalize( node.getValue().getString() );
332             }
333             
334             Cursor<String> result = idx.forwardValueCursor( partitionTxn, ( V ) normalizedKey );
335             Set<String> values = new HashSet<>();
336             int nbFound = 0;
337 
338             for ( String value : result )
339             {
340                 values.add( value );
341                 nbFound++;
342 
343                 // Arbitrary stop gathering the candidates if we have more than 100
344                 if ( nbFound == 100 )
345                 {
346                     break;
347                 }
348             }
349 
350             result.close();
351 
352             if ( nbFound < 100 )
353             {
354                 // Store the found candidates in the node
355                 node.set( CANDIDATES_ANNOTATION_KEY, values );
356 
357                 return values.size();
358             }
359             else
360             {
361                 // Reset the candidates annotation
362                 node.set( CANDIDATES_ANNOTATION_KEY, null );
363 
364                 return idx.count( partitionTxn, ( V ) node.getValue().getNormalized() );
365             }
366         }
367 
368         // count for non-indexed attribute is unknown so we presume da worst
369         return Long.MAX_VALUE;
370     }
371 
372 
373     /**
374      * Gets a scan count of the nodes that satisfy the greater or less than test
375      * specified by the node.
376      *
377      * @param node the greater or less than node to get a count for 
378      * @param isGreaterThan if true test is for >=, otherwise <=
379      * @return the scan count of all nodes satisfying the Ava
380      * @throws Exception if there is an error accessing an index
381      */
382     @SuppressWarnings("unchecked")
383     private <V> long getGreaterLessScan( PartitionTxn partitionTxn, SimpleNode<V> node, boolean isGreaterThan ) throws LdapException, IndexNotFoundException
384     {
385         if ( db.hasIndexOn( node.getAttributeType() ) )
386         {
387             Index<V, String> idx = ( Index<V, String> ) db.getIndex( node.getAttributeType() );
388 
389             if ( isGreaterThan )
390             {
391                 return idx.greaterThanCount( partitionTxn, ( V ) node.getValue().getString() );
392             }
393             else
394             {
395                 return idx.lessThanCount( partitionTxn, ( V ) node.getValue().getString() );
396             }
397         }
398 
399         // count for non-indexed attribute is unknown so we presume da worst
400         return Long.MAX_VALUE;
401     }
402 
403 
404     /**
405      * Get a scan count based on a Substring node : we will count the entries that are greater
406      * than ABC where the filter is (attr=ABC*). Any other filter won't be evaluated (for instance,
407      * a filter like (attr=*ABC) will resolve to a full scan atm - we could have created a reverted
408      * index for such a case -, and filters like (attr=*ABC*) also esolve to a full scan).
409      * 
410      * @param node The substring node
411      * @return The number of candidates
412      * @throws Exception If there is an error accessing an index
413      */
414     private long getSubstringScan( PartitionTxn partitionTxn, SubstringNode node ) throws LdapException, IndexNotFoundException
415     {
416         if ( db.hasIndexOn( node.getAttributeType() ) )
417         {
418             Index<String, String> idx = ( Index<String, String> ) db.getIndex( node.getAttributeType() );
419 
420             String initial = node.getInitial();
421 
422             if ( Strings.isEmpty( initial ) )
423             {
424                 // Not a (attr=ABC*) filter : full index scan
425                 return idx.count( partitionTxn );
426             }
427             else
428             {
429                 return idx.greaterThanCount( partitionTxn, initial );
430             }
431         }
432         else
433         {
434             // count for non-indexed attribute is unknown so we presume da worst
435             return Long.MAX_VALUE;
436         }
437     }
438 
439 
440     /**
441      * Gets the total number of entries within the database index if one is 
442      * available otherwise the count of all the entries within the database is
443      * returned.
444      *
445      * @param node the leaf node to get a full scan count for 
446      * @return the worst case full scan count
447      * @throws Exception if there is an error access database indices
448      */
449     private long getFullScan( PartitionTxn partitionTxn, LeafNode node ) throws LdapException, IndexNotFoundException
450     {
451         if ( db.hasIndexOn( node.getAttributeType() ) )
452         {
453             Index<?, ?> idx = db.getIndex( node.getAttributeType() );
454             return idx.count( partitionTxn );
455         }
456 
457         return Long.MAX_VALUE;
458     }
459 
460 
461     /**
462      * Gets the number of entries that would be returned by a presence node
463      * assertion.  Leverages the presence system index for scan counts.
464      *
465      * @param node the presence node
466      * @return the number of entries matched for the presence of an attribute
467      * @throws Exception if errors result
468      */
469     private long getPresenceScan( PartitionTxn partitionTxn, PresenceNode node ) throws LdapException
470     {
471         if ( db.hasUserIndexOn( node.getAttributeType() )
472              || node.getAttributeType().getOid().equals( SchemaConstants.ADMINISTRATIVE_ROLE_AT_OID ) )
473         {
474             Index<String, String> presenceIndex = db.getPresenceIndex();
475 
476             return presenceIndex.count( partitionTxn, node.getAttributeType().getOid() );
477         }
478         else if ( db.hasSystemIndexOn( node.getAttributeType() )
479             || ( node.getAttributeType().getOid() == SchemaConstants.ENTRY_UUID_AT_OID ) )
480         {
481             // the system indices (objectClass, entryUUID and entryCSN) are maintained for
482             // each entry, so we could just return the database count
483             return db.count( partitionTxn );
484         }
485 
486         return Long.MAX_VALUE;
487     }
488 
489 
490     /**
491      * Gets the scan count for the scope node attached to this filter.
492      *
493      * @param node the ScopeNode
494      * @return the scan count for scope
495      * @throws Exception if any errors result
496      */
497     private long getScopeScan( PartitionTxn partitionTxn, ScopeNode node ) throws LdapException
498     {
499         String id = node.getBaseId();
500 
501         switch ( node.getScope() )
502         {
503             case OBJECT:
504                 return 1L;
505 
506             case ONELEVEL:
507                 return db.getChildCount( partitionTxn, id );
508 
509             case SUBTREE:
510                 if ( id == getContextEntryId( partitionTxn ) )
511                 {
512                     return db.count( partitionTxn );
513                 }
514                 else
515                 {
516                     return db.getRdnIndex().reverseLookup( partitionTxn, id ).getNbDescendants() + 1L;
517                 }
518 
519             default:
520                 throw new IllegalArgumentException( I18n.err( I18n.ERR_713 ) );
521         }
522     }
523 }