View Javadoc
1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.onehippo.forge.content.exim.core.util;
18  
19  import java.util.ArrayList;
20  import java.util.List;
21  import java.util.StringTokenizer;
22  
23  /**
24   * Note: Copied from the source of <code>org.apache.camel.util.AntPathMatcher</code>.
25   * <p>
26   * PathMatcher implementation for Ant-style path patterns. Examples are provided below.
27   * <p>
28   * Part of this mapping code has been kindly borrowed from <a href="http://ant.apache.org">Apache Ant</a>
29   * and <a href="http://springframework.org">Spring Framework</a>.
30   * <p>
31   * The mapping matches URLs using the following rules:<br>
32   * <ul>
33   *   <li>? matches one character</li>
34   *   <li>* matches zero or more characters</li>
35   *   <li>** matches zero or more 'directories' in a path</li>
36   * </ul>
37   * <p>
38   * Some examples:<br>
39   * <ul>
40   *   <li><code>com/t?st.jsp</code> - matches <code>com/test.jsp</code> but also
41   *       <code>com/tast.jsp</code> or <code>com/txst.jsp</code>
42   *   </li>
43   *   <li><code>com/*.jsp</code> - matches all <code>.jsp</code> files in the
44   *       <code>com</code> directory
45   *   </li>
46   *   <li><code>com/&#42;&#42;/test.jsp</code> - matches all <code>test.jsp</code>
47   *       files underneath the <code>com</code> path
48   *   </li>
49   *   <li><code>org/springframework/&#42;&#42;/*.jsp</code> - matches all
50   *       <code>.jsp</code> files underneath the <code>org/springframework</code> path
51   *   </li>
52   *   <li><code>org/&#42;&#42;/servlet/bla.jsp</code> - matches
53   *       <code>org/springframework/servlet/bla.jsp</code> but also
54   *       <code>org/springframework/testing/servlet/bla.jsp</code> and
55   *       <code>org/servlet/bla.jsp</code>
56   *   </li>
57   * </ul>
58   */
59  public class AntPathMatcher {
60  
61      /** Default path separator: "/" */
62      public static final String DEFAULT_PATH_SEPARATOR = "/";
63  
64      private String pathSeparator = DEFAULT_PATH_SEPARATOR;
65  
66      /**
67       * Set the path separator to use for pattern parsing. Default is "/", as in Ant.
68       * @param pathSeparator path separator
69       */
70      public void setPathSeparator(String pathSeparator) {
71          this.pathSeparator = pathSeparator != null ? pathSeparator : DEFAULT_PATH_SEPARATOR;
72      }
73  
74      /**
75       * Check if {@code path} is an ANT style pattern string.
76       * @param path path pattern
77       * @return true if {@code path} is an ANT style pattern string
78       */
79      public boolean isPattern(String path) {
80          return path.indexOf('*') != -1 || path.indexOf('?') != -1;
81      }
82  
83      /**
84       * Matches.
85       * @param pattern pattern
86       * @param path path
87       * @return true if matched
88       */
89      public boolean match(String pattern, String path) {
90          return match(pattern, path, true);
91      }
92  
93      /**
94       * Match starts.
95       * @param pattern pattern
96       * @param path path
97       * @return true if match starts
98       */
99      public boolean matchStart(String pattern, String path) {
100         return matchStart(pattern, path, true);
101     }
102 
103     /**
104      * Match.
105      * @param pattern pattern
106      * @param path path
107      * @param isCaseSensitive case sensitiveness
108      * @return true if matches
109      */
110     public boolean match(String pattern, String path, boolean isCaseSensitive) {
111         return doMatch(pattern, path, true, isCaseSensitive);
112     }
113 
114     /**
115      * Match starts.
116      * @param pattern pattern
117      * @param path path
118      * @param isCaseSensitive case sensitiveness
119      * @return true if match starts
120      */
121     public boolean matchStart(String pattern, String path, boolean isCaseSensitive) {
122         return doMatch(pattern, path, false, isCaseSensitive);
123     }
124 
125     /**
126      * Actually match the given <code>path</code> against the given
127      * <code>pattern</code>.
128      * 
129      * @param pattern the pattern to match against
130      * @param path the path String to test
131      * @param fullMatch whether a full pattern match is required (else a pattern
132      *            match as far as the given base path goes is sufficient)
133      * @param isCaseSensitive Whether or not matching should be performed
134      *                        case sensitively.
135      * @return <code>true</code> if the supplied <code>path</code> matched,
136      *         <code>false</code> if it didn't
137      */
138     protected boolean doMatch(String pattern, String path, boolean fullMatch, boolean isCaseSensitive) {
139         if (path.startsWith(this.pathSeparator) != pattern.startsWith(this.pathSeparator)) {
140             return false;
141         }
142 
143         String[] pattDirs = tokenizeToStringArray(pattern, this.pathSeparator);
144         String[] pathDirs = tokenizeToStringArray(path, this.pathSeparator);
145 
146         int pattIdxStart = 0;
147         int pattIdxEnd = pattDirs.length - 1;
148         int pathIdxStart = 0;
149         int pathIdxEnd = pathDirs.length - 1;
150 
151         // Match all elements up to the first **
152         while (pattIdxStart <= pattIdxEnd && pathIdxStart <= pathIdxEnd) {
153             String patDir = pattDirs[pattIdxStart];
154             if ("**".equals(patDir)) {
155                 break;
156             }
157             if (!matchStrings(patDir, pathDirs[pathIdxStart], isCaseSensitive)) {
158                 return false;
159             }
160             pattIdxStart++;
161             pathIdxStart++;
162         }
163 
164         if (pathIdxStart > pathIdxEnd) {
165             // Path is exhausted, only match if rest of pattern is * or **'s
166             if (pattIdxStart > pattIdxEnd) {
167                 return pattern.endsWith(this.pathSeparator) ? path.endsWith(this.pathSeparator) : !path
168                     .endsWith(this.pathSeparator);
169             }
170             if (!fullMatch) {
171                 return true;
172             }
173             if (pattIdxStart == pattIdxEnd && pattDirs[pattIdxStart].equals("*")
174                 && path.endsWith(this.pathSeparator)) {
175                 return true;
176             }
177             for (int i = pattIdxStart; i <= pattIdxEnd; i++) {
178                 if (!pattDirs[i].equals("**")) {
179                     return false;
180                 }
181             }
182             return true;
183         } else if (pattIdxStart > pattIdxEnd) {
184             // String not exhausted, but pattern is. Failure.
185             return false;
186         } else if (!fullMatch && "**".equals(pattDirs[pattIdxStart])) {
187             // Path start definitely matches due to "**" part in pattern.
188             return true;
189         }
190 
191         // up to last '**'
192         while (pattIdxStart <= pattIdxEnd && pathIdxStart <= pathIdxEnd) {
193             String patDir = pattDirs[pattIdxEnd];
194             if (patDir.equals("**")) {
195                 break;
196             }
197             if (!matchStrings(patDir, pathDirs[pathIdxEnd], isCaseSensitive)) {
198                 return false;
199             }
200             pattIdxEnd--;
201             pathIdxEnd--;
202         }
203         if (pathIdxStart > pathIdxEnd) {
204             // String is exhausted
205             for (int i = pattIdxStart; i <= pattIdxEnd; i++) {
206                 if (!pattDirs[i].equals("**")) {
207                     return false;
208                 }
209             }
210             return true;
211         }
212 
213         while (pattIdxStart != pattIdxEnd && pathIdxStart <= pathIdxEnd) {
214             int patIdxTmp = -1;
215             for (int i = pattIdxStart + 1; i <= pattIdxEnd; i++) {
216                 if (pattDirs[i].equals("**")) {
217                     patIdxTmp = i;
218                     break;
219                 }
220             }
221             if (patIdxTmp == pattIdxStart + 1) {
222                 // '**/**' situation, so skip one
223                 pattIdxStart++;
224                 continue;
225             }
226             // Find the pattern between padIdxStart & padIdxTmp in str between
227             // strIdxStart & strIdxEnd
228             int patLength = patIdxTmp - pattIdxStart - 1;
229             int strLength = pathIdxEnd - pathIdxStart + 1;
230             int foundIdx = -1;
231 
232         strLoop:
233             for (int i = 0; i <= strLength - patLength; i++) {
234                 for (int j = 0; j < patLength; j++) {
235                     String subPat = pattDirs[pattIdxStart + j + 1];
236                     String subStr = pathDirs[pathIdxStart + i + j];
237                     if (!matchStrings(subPat, subStr, isCaseSensitive)) {
238                         continue strLoop;
239                     }
240                 }
241                 foundIdx = pathIdxStart + i;
242                 break;
243             }
244 
245             if (foundIdx == -1) {
246                 return false;
247             }
248 
249             pattIdxStart = patIdxTmp;
250             pathIdxStart = foundIdx + patLength;
251         }
252 
253         for (int i = pattIdxStart; i <= pattIdxEnd; i++) {
254             if (!pattDirs[i].equals("**")) {
255                 return false;
256             }
257         }
258 
259         return true;
260     }
261 
262     /**
263      * Tests whether or not a string matches against a pattern. The pattern may
264      * contain two special characters:<br>
265      * '*' means zero or more characters<br>
266      * '?' means one and only one character
267      * 
268      * @param pattern pattern to match against. Must not be <code>null</code>.
269      * @param str string which must be matched against the pattern. Must not be
270      *            <code>null</code>.
271      * @param caseSensitive Whether or not matching should be performed
272      *                      case sensitively.
273      * @return <code>true</code> if the string matches against the pattern, or
274      *         <code>false</code> otherwise.
275      */
276     private boolean matchStrings(String pattern, String str, boolean caseSensitive) {
277         char[] patArr = pattern.toCharArray();
278         char[] strArr = str.toCharArray();
279         int patIdxStart = 0;
280         int patIdxEnd = patArr.length - 1;
281         int strIdxStart = 0;
282         int strIdxEnd = strArr.length - 1;
283         char ch;
284 
285         boolean containsStar = false;
286         for (char c : patArr) {
287             if (c == '*') {
288                 containsStar = true;
289                 break;
290             }
291         }
292 
293         if (!containsStar) {
294             // No '*'s, so we make a shortcut
295             if (patIdxEnd != strIdxEnd) {
296                 return false; // Pattern and string do not have the same size
297             }
298             for (int i = 0; i <= patIdxEnd; i++) {
299                 ch = patArr[i];
300                 if (ch != '?') {
301                     if (different(caseSensitive, ch, strArr[i])) {
302                         return false;
303                         // Character mismatch
304                     }
305                 }
306             }
307             return true; // String matches against pattern
308         }
309 
310         if (patIdxEnd == 0) {
311             return true; // Pattern contains only '*', which matches anything
312         }
313 
314         // Process characters before first star
315         while ((ch = patArr[patIdxStart]) != '*' && strIdxStart <= strIdxEnd) {
316             if (ch != '?') {
317                 if (different(caseSensitive, ch, strArr[strIdxStart])) {
318                     return false;
319                     // Character mismatch
320                 }
321             }
322             patIdxStart++;
323             strIdxStart++;
324         }
325         if (strIdxStart > strIdxEnd) {
326             // All characters in the string are used. Check if only '*'s are
327             // left in the pattern. If so, we succeeded. Otherwise failure.
328             for (int i = patIdxStart; i <= patIdxEnd; i++) {
329                 if (patArr[i] != '*') {
330                     return false;
331                 }
332             }
333             return true;
334         }
335 
336         // Process characters after last star
337         while ((ch = patArr[patIdxEnd]) != '*' && strIdxStart <= strIdxEnd) {
338             if (ch != '?') {
339                 if (different(caseSensitive, ch, strArr[strIdxEnd])) {
340                     return false;
341                     // Character mismatch
342                 }
343             }
344             patIdxEnd--;
345             strIdxEnd--;
346         }
347         if (strIdxStart > strIdxEnd) {
348             // All characters in the string are used. Check if only '*'s are
349             // left in the pattern. If so, we succeeded. Otherwise failure.
350             for (int i = patIdxStart; i <= patIdxEnd; i++) {
351                 if (patArr[i] != '*') {
352                     return false;
353                 }
354             }
355             return true;
356         }
357 
358         // process pattern between stars. padIdxStart and patIdxEnd point
359         // always to a '*'.
360         while (patIdxStart != patIdxEnd && strIdxStart <= strIdxEnd) {
361             int patIdxTmp = -1;
362             for (int i = patIdxStart + 1; i <= patIdxEnd; i++) {
363                 if (patArr[i] == '*') {
364                     patIdxTmp = i;
365                     break;
366                 }
367             }
368             if (patIdxTmp == patIdxStart + 1) {
369                 // Two stars next to each other, skip the first one.
370                 patIdxStart++;
371                 continue;
372             }
373             // Find the pattern between padIdxStart & padIdxTmp in str between
374             // strIdxStart & strIdxEnd
375             int patLength = patIdxTmp - patIdxStart - 1;
376             int strLength = strIdxEnd - strIdxStart + 1;
377             int foundIdx = -1;
378         strLoop: 
379             for (int i = 0; i <= strLength - patLength; i++) {
380                 for (int j = 0; j < patLength; j++) {
381                     ch = patArr[patIdxStart + j + 1];
382                     if (ch != '?') {
383                         if (different(caseSensitive, ch, strArr[strIdxStart + i + j])) {
384                             continue strLoop;
385                         }
386                     }
387                 }
388 
389                 foundIdx = strIdxStart + i;
390                 break;
391             }
392 
393             if (foundIdx == -1) {
394                 return false;
395             }
396 
397             patIdxStart = patIdxTmp;
398             strIdxStart = foundIdx + patLength;
399         }
400 
401         // All characters in the string are used. Check if only '*'s are left
402         // in the pattern. If so, we succeeded. Otherwise failure.
403         for (int i = patIdxStart; i <= patIdxEnd; i++) {
404             if (patArr[i] != '*') {
405                 return false;
406             }
407         }
408 
409         return true;
410     }
411 
412     /**
413      * Given a pattern and a full path, determine the pattern-mapped part.
414      * <p>
415      * For example:
416      * <ul>
417      * <li>'<code>/docs/cvs/commit.html</code>' and '
418      * <code>/docs/cvs/commit.html</code> -&gt; ''</li>
419      * <li>'<code>/docs/*</code>' and '<code>/docs/cvs/commit</code> -&gt; '
420      * <code>cvs/commit</code>'</li>
421      * <li>'<code>/docs/cvs/*.html</code>' and '
422      * <code>/docs/cvs/commit.html</code> -&gt; '<code>commit.html</code>'</li>
423      * <li>'<code>/docs/**</code>' and '<code>/docs/cvs/commit</code> -&gt; '
424      * <code>cvs/commit</code>'</li>
425      * <li>'<code>/docs/**\/*.html</code>' and '
426      * <code>/docs/cvs/commit.html</code> -&gt; '<code>cvs/commit.html</code>'</li>
427      * <li>'<code>/*.html</code>' and '<code>/docs/cvs/commit.html</code> -&gt; '
428      * <code>docs/cvs/commit.html</code>'</li>
429      * <li>'<code>*.html</code>' and '<code>/docs/cvs/commit.html</code> -&gt; '
430      * <code>/docs/cvs/commit.html</code>'</li>
431      * <li>'<code>*</code>' and '<code>/docs/cvs/commit.html</code> -&gt; '
432      * <code>/docs/cvs/commit.html</code>'</li>
433      * </ul>
434      * <p>
435      * Assumes that {@link #match} returns <code>true</code> for '
436      * <code>pattern</code>' and '<code>path</code>', but does
437      * <strong>not</strong> enforce this.
438      * @param pattern pattern
439      * @param path path
440      * @return extracted path within the pattern
441      */
442     public String extractPathWithinPattern(String pattern, String path) {
443         String[] patternParts = tokenizeToStringArray(pattern, this.pathSeparator);
444         String[] pathParts = tokenizeToStringArray(path, this.pathSeparator);
445 
446         StringBuilder buffer = new StringBuilder();
447 
448         // Add any path parts that have a wildcarded pattern part.
449         int puts = 0;
450         for (int i = 0; i < patternParts.length; i++) {
451             String patternPart = patternParts[i];
452             if ((patternPart.indexOf('*') > -1 || patternPart.indexOf('?') > -1) && pathParts.length >= i + 1) {
453                 if (puts > 0 || (i == 0 && !pattern.startsWith(this.pathSeparator))) {
454                     buffer.append(this.pathSeparator);
455                 }
456                 buffer.append(pathParts[i]);
457                 puts++;
458             }
459         }
460 
461         // Append any trailing path parts.
462         for (int i = patternParts.length; i < pathParts.length; i++) {
463             if (puts > 0 || i > 0) {
464                 buffer.append(this.pathSeparator);
465             }
466             buffer.append(pathParts[i]);
467         }
468 
469         return buffer.toString();
470     }
471 
472     /**
473      * Tokenize the given String into a String array via a StringTokenizer.
474      * Trims tokens and omits empty tokens.
475      * <p>
476      * The given delimiters string is supposed to consist of any number of
477      * delimiter characters. Each of those characters can be used to separate
478      * tokens. A delimiter is always a single character; for multi-character
479      * delimiters, consider using <code>delimitedListToStringArray</code>
480      * 
481      * @param str the String to tokenize
482      * @param delimiters the delimiter characters, assembled as String (each of
483      *            those characters is individually considered as delimiter).
484      * @return an array of the tokens
485      * @see java.util.StringTokenizer
486      * @see java.lang.String#trim()
487      */
488     public static String[] tokenizeToStringArray(String str, String delimiters) {
489         if (str == null) {
490             return null;
491         }
492         StringTokenizer st = new StringTokenizer(str, delimiters);
493         List<String> tokens = new ArrayList<String>();
494         while (st.hasMoreTokens()) {
495             String token = st.nextToken();
496             token = token.trim();
497             if (token.length() > 0) {
498                 tokens.add(token);
499             }
500         }
501         return tokens.toArray(new String[tokens.size()]);
502     }
503 
504     private static boolean different(boolean caseSensitive, char ch, char other) {
505         return caseSensitive ? ch != other : Character.toUpperCase(ch) != Character.toUpperCase(other);
506     }
507 }