1 /**
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.onehippo.forge.content.exim.core.util;
18
19 import java.util.ArrayList;
20 import java.util.List;
21 import java.util.StringTokenizer;
22
23 /**
24 * Note: Copied from the source of <code>org.apache.camel.util.AntPathMatcher</code>.
25 * <p>
26 * PathMatcher implementation for Ant-style path patterns. Examples are provided below.
27 * <p>
28 * Part of this mapping code has been kindly borrowed from <a href="http://ant.apache.org">Apache Ant</a>
29 * and <a href="http://springframework.org">Spring Framework</a>.
30 * <p>
31 * The mapping matches URLs using the following rules:<br>
32 * <ul>
33 * <li>? matches one character</li>
34 * <li>* matches zero or more characters</li>
35 * <li>** matches zero or more 'directories' in a path</li>
36 * </ul>
37 * <p>
38 * Some examples:<br>
39 * <ul>
40 * <li><code>com/t?st.jsp</code> - matches <code>com/test.jsp</code> but also
41 * <code>com/tast.jsp</code> or <code>com/txst.jsp</code>
42 * </li>
43 * <li><code>com/*.jsp</code> - matches all <code>.jsp</code> files in the
44 * <code>com</code> directory
45 * </li>
46 * <li><code>com/**/test.jsp</code> - matches all <code>test.jsp</code>
47 * files underneath the <code>com</code> path
48 * </li>
49 * <li><code>org/springframework/**/*.jsp</code> - matches all
50 * <code>.jsp</code> files underneath the <code>org/springframework</code> path
51 * </li>
52 * <li><code>org/**/servlet/bla.jsp</code> - matches
53 * <code>org/springframework/servlet/bla.jsp</code> but also
54 * <code>org/springframework/testing/servlet/bla.jsp</code> and
55 * <code>org/servlet/bla.jsp</code>
56 * </li>
57 * </ul>
58 */
59 public class AntPathMatcher {
60
61 /** Default path separator: "/" */
62 public static final String DEFAULT_PATH_SEPARATOR = "/";
63
64 private String pathSeparator = DEFAULT_PATH_SEPARATOR;
65
66 /**
67 * Set the path separator to use for pattern parsing. Default is "/", as in Ant.
68 * @param pathSeparator path separator
69 */
70 public void setPathSeparator(String pathSeparator) {
71 this.pathSeparator = pathSeparator != null ? pathSeparator : DEFAULT_PATH_SEPARATOR;
72 }
73
74 /**
75 * Check if {@code path} is an ANT style pattern string.
76 * @param path path pattern
77 * @return true if {@code path} is an ANT style pattern string
78 */
79 public boolean isPattern(String path) {
80 return path.indexOf('*') != -1 || path.indexOf('?') != -1;
81 }
82
83 /**
84 * Matches.
85 * @param pattern pattern
86 * @param path path
87 * @return true if matched
88 */
89 public boolean match(String pattern, String path) {
90 return match(pattern, path, true);
91 }
92
93 /**
94 * Match starts.
95 * @param pattern pattern
96 * @param path path
97 * @return true if match starts
98 */
99 public boolean matchStart(String pattern, String path) {
100 return matchStart(pattern, path, true);
101 }
102
103 /**
104 * Match.
105 * @param pattern pattern
106 * @param path path
107 * @param isCaseSensitive case sensitiveness
108 * @return true if matches
109 */
110 public boolean match(String pattern, String path, boolean isCaseSensitive) {
111 return doMatch(pattern, path, true, isCaseSensitive);
112 }
113
114 /**
115 * Match starts.
116 * @param pattern pattern
117 * @param path path
118 * @param isCaseSensitive case sensitiveness
119 * @return true if match starts
120 */
121 public boolean matchStart(String pattern, String path, boolean isCaseSensitive) {
122 return doMatch(pattern, path, false, isCaseSensitive);
123 }
124
125 /**
126 * Actually match the given <code>path</code> against the given
127 * <code>pattern</code>.
128 *
129 * @param pattern the pattern to match against
130 * @param path the path String to test
131 * @param fullMatch whether a full pattern match is required (else a pattern
132 * match as far as the given base path goes is sufficient)
133 * @param isCaseSensitive Whether or not matching should be performed
134 * case sensitively.
135 * @return <code>true</code> if the supplied <code>path</code> matched,
136 * <code>false</code> if it didn't
137 */
138 protected boolean doMatch(String pattern, String path, boolean fullMatch, boolean isCaseSensitive) {
139 if (path.startsWith(this.pathSeparator) != pattern.startsWith(this.pathSeparator)) {
140 return false;
141 }
142
143 String[] pattDirs = tokenizeToStringArray(pattern, this.pathSeparator);
144 String[] pathDirs = tokenizeToStringArray(path, this.pathSeparator);
145
146 int pattIdxStart = 0;
147 int pattIdxEnd = pattDirs.length - 1;
148 int pathIdxStart = 0;
149 int pathIdxEnd = pathDirs.length - 1;
150
151 // Match all elements up to the first **
152 while (pattIdxStart <= pattIdxEnd && pathIdxStart <= pathIdxEnd) {
153 String patDir = pattDirs[pattIdxStart];
154 if ("**".equals(patDir)) {
155 break;
156 }
157 if (!matchStrings(patDir, pathDirs[pathIdxStart], isCaseSensitive)) {
158 return false;
159 }
160 pattIdxStart++;
161 pathIdxStart++;
162 }
163
164 if (pathIdxStart > pathIdxEnd) {
165 // Path is exhausted, only match if rest of pattern is * or **'s
166 if (pattIdxStart > pattIdxEnd) {
167 return pattern.endsWith(this.pathSeparator) ? path.endsWith(this.pathSeparator) : !path
168 .endsWith(this.pathSeparator);
169 }
170 if (!fullMatch) {
171 return true;
172 }
173 if (pattIdxStart == pattIdxEnd && pattDirs[pattIdxStart].equals("*")
174 && path.endsWith(this.pathSeparator)) {
175 return true;
176 }
177 for (int i = pattIdxStart; i <= pattIdxEnd; i++) {
178 if (!pattDirs[i].equals("**")) {
179 return false;
180 }
181 }
182 return true;
183 } else if (pattIdxStart > pattIdxEnd) {
184 // String not exhausted, but pattern is. Failure.
185 return false;
186 } else if (!fullMatch && "**".equals(pattDirs[pattIdxStart])) {
187 // Path start definitely matches due to "**" part in pattern.
188 return true;
189 }
190
191 // up to last '**'
192 while (pattIdxStart <= pattIdxEnd && pathIdxStart <= pathIdxEnd) {
193 String patDir = pattDirs[pattIdxEnd];
194 if (patDir.equals("**")) {
195 break;
196 }
197 if (!matchStrings(patDir, pathDirs[pathIdxEnd], isCaseSensitive)) {
198 return false;
199 }
200 pattIdxEnd--;
201 pathIdxEnd--;
202 }
203 if (pathIdxStart > pathIdxEnd) {
204 // String is exhausted
205 for (int i = pattIdxStart; i <= pattIdxEnd; i++) {
206 if (!pattDirs[i].equals("**")) {
207 return false;
208 }
209 }
210 return true;
211 }
212
213 while (pattIdxStart != pattIdxEnd && pathIdxStart <= pathIdxEnd) {
214 int patIdxTmp = -1;
215 for (int i = pattIdxStart + 1; i <= pattIdxEnd; i++) {
216 if (pattDirs[i].equals("**")) {
217 patIdxTmp = i;
218 break;
219 }
220 }
221 if (patIdxTmp == pattIdxStart + 1) {
222 // '**/**' situation, so skip one
223 pattIdxStart++;
224 continue;
225 }
226 // Find the pattern between padIdxStart & padIdxTmp in str between
227 // strIdxStart & strIdxEnd
228 int patLength = patIdxTmp - pattIdxStart - 1;
229 int strLength = pathIdxEnd - pathIdxStart + 1;
230 int foundIdx = -1;
231
232 strLoop:
233 for (int i = 0; i <= strLength - patLength; i++) {
234 for (int j = 0; j < patLength; j++) {
235 String subPat = pattDirs[pattIdxStart + j + 1];
236 String subStr = pathDirs[pathIdxStart + i + j];
237 if (!matchStrings(subPat, subStr, isCaseSensitive)) {
238 continue strLoop;
239 }
240 }
241 foundIdx = pathIdxStart + i;
242 break;
243 }
244
245 if (foundIdx == -1) {
246 return false;
247 }
248
249 pattIdxStart = patIdxTmp;
250 pathIdxStart = foundIdx + patLength;
251 }
252
253 for (int i = pattIdxStart; i <= pattIdxEnd; i++) {
254 if (!pattDirs[i].equals("**")) {
255 return false;
256 }
257 }
258
259 return true;
260 }
261
262 /**
263 * Tests whether or not a string matches against a pattern. The pattern may
264 * contain two special characters:<br>
265 * '*' means zero or more characters<br>
266 * '?' means one and only one character
267 *
268 * @param pattern pattern to match against. Must not be <code>null</code>.
269 * @param str string which must be matched against the pattern. Must not be
270 * <code>null</code>.
271 * @param caseSensitive Whether or not matching should be performed
272 * case sensitively.
273 * @return <code>true</code> if the string matches against the pattern, or
274 * <code>false</code> otherwise.
275 */
276 private boolean matchStrings(String pattern, String str, boolean caseSensitive) {
277 char[] patArr = pattern.toCharArray();
278 char[] strArr = str.toCharArray();
279 int patIdxStart = 0;
280 int patIdxEnd = patArr.length - 1;
281 int strIdxStart = 0;
282 int strIdxEnd = strArr.length - 1;
283 char ch;
284
285 boolean containsStar = false;
286 for (char c : patArr) {
287 if (c == '*') {
288 containsStar = true;
289 break;
290 }
291 }
292
293 if (!containsStar) {
294 // No '*'s, so we make a shortcut
295 if (patIdxEnd != strIdxEnd) {
296 return false; // Pattern and string do not have the same size
297 }
298 for (int i = 0; i <= patIdxEnd; i++) {
299 ch = patArr[i];
300 if (ch != '?') {
301 if (different(caseSensitive, ch, strArr[i])) {
302 return false;
303 // Character mismatch
304 }
305 }
306 }
307 return true; // String matches against pattern
308 }
309
310 if (patIdxEnd == 0) {
311 return true; // Pattern contains only '*', which matches anything
312 }
313
314 // Process characters before first star
315 while ((ch = patArr[patIdxStart]) != '*' && strIdxStart <= strIdxEnd) {
316 if (ch != '?') {
317 if (different(caseSensitive, ch, strArr[strIdxStart])) {
318 return false;
319 // Character mismatch
320 }
321 }
322 patIdxStart++;
323 strIdxStart++;
324 }
325 if (strIdxStart > strIdxEnd) {
326 // All characters in the string are used. Check if only '*'s are
327 // left in the pattern. If so, we succeeded. Otherwise failure.
328 for (int i = patIdxStart; i <= patIdxEnd; i++) {
329 if (patArr[i] != '*') {
330 return false;
331 }
332 }
333 return true;
334 }
335
336 // Process characters after last star
337 while ((ch = patArr[patIdxEnd]) != '*' && strIdxStart <= strIdxEnd) {
338 if (ch != '?') {
339 if (different(caseSensitive, ch, strArr[strIdxEnd])) {
340 return false;
341 // Character mismatch
342 }
343 }
344 patIdxEnd--;
345 strIdxEnd--;
346 }
347 if (strIdxStart > strIdxEnd) {
348 // All characters in the string are used. Check if only '*'s are
349 // left in the pattern. If so, we succeeded. Otherwise failure.
350 for (int i = patIdxStart; i <= patIdxEnd; i++) {
351 if (patArr[i] != '*') {
352 return false;
353 }
354 }
355 return true;
356 }
357
358 // process pattern between stars. padIdxStart and patIdxEnd point
359 // always to a '*'.
360 while (patIdxStart != patIdxEnd && strIdxStart <= strIdxEnd) {
361 int patIdxTmp = -1;
362 for (int i = patIdxStart + 1; i <= patIdxEnd; i++) {
363 if (patArr[i] == '*') {
364 patIdxTmp = i;
365 break;
366 }
367 }
368 if (patIdxTmp == patIdxStart + 1) {
369 // Two stars next to each other, skip the first one.
370 patIdxStart++;
371 continue;
372 }
373 // Find the pattern between padIdxStart & padIdxTmp in str between
374 // strIdxStart & strIdxEnd
375 int patLength = patIdxTmp - patIdxStart - 1;
376 int strLength = strIdxEnd - strIdxStart + 1;
377 int foundIdx = -1;
378 strLoop:
379 for (int i = 0; i <= strLength - patLength; i++) {
380 for (int j = 0; j < patLength; j++) {
381 ch = patArr[patIdxStart + j + 1];
382 if (ch != '?') {
383 if (different(caseSensitive, ch, strArr[strIdxStart + i + j])) {
384 continue strLoop;
385 }
386 }
387 }
388
389 foundIdx = strIdxStart + i;
390 break;
391 }
392
393 if (foundIdx == -1) {
394 return false;
395 }
396
397 patIdxStart = patIdxTmp;
398 strIdxStart = foundIdx + patLength;
399 }
400
401 // All characters in the string are used. Check if only '*'s are left
402 // in the pattern. If so, we succeeded. Otherwise failure.
403 for (int i = patIdxStart; i <= patIdxEnd; i++) {
404 if (patArr[i] != '*') {
405 return false;
406 }
407 }
408
409 return true;
410 }
411
412 /**
413 * Given a pattern and a full path, determine the pattern-mapped part.
414 * <p>
415 * For example:
416 * <ul>
417 * <li>'<code>/docs/cvs/commit.html</code>' and '
418 * <code>/docs/cvs/commit.html</code> -> ''</li>
419 * <li>'<code>/docs/*</code>' and '<code>/docs/cvs/commit</code> -> '
420 * <code>cvs/commit</code>'</li>
421 * <li>'<code>/docs/cvs/*.html</code>' and '
422 * <code>/docs/cvs/commit.html</code> -> '<code>commit.html</code>'</li>
423 * <li>'<code>/docs/**</code>' and '<code>/docs/cvs/commit</code> -> '
424 * <code>cvs/commit</code>'</li>
425 * <li>'<code>/docs/**\/*.html</code>' and '
426 * <code>/docs/cvs/commit.html</code> -> '<code>cvs/commit.html</code>'</li>
427 * <li>'<code>/*.html</code>' and '<code>/docs/cvs/commit.html</code> -> '
428 * <code>docs/cvs/commit.html</code>'</li>
429 * <li>'<code>*.html</code>' and '<code>/docs/cvs/commit.html</code> -> '
430 * <code>/docs/cvs/commit.html</code>'</li>
431 * <li>'<code>*</code>' and '<code>/docs/cvs/commit.html</code> -> '
432 * <code>/docs/cvs/commit.html</code>'</li>
433 * </ul>
434 * <p>
435 * Assumes that {@link #match} returns <code>true</code> for '
436 * <code>pattern</code>' and '<code>path</code>', but does
437 * <strong>not</strong> enforce this.
438 * @param pattern pattern
439 * @param path path
440 * @return extracted path within the pattern
441 */
442 public String extractPathWithinPattern(String pattern, String path) {
443 String[] patternParts = tokenizeToStringArray(pattern, this.pathSeparator);
444 String[] pathParts = tokenizeToStringArray(path, this.pathSeparator);
445
446 StringBuilder buffer = new StringBuilder();
447
448 // Add any path parts that have a wildcarded pattern part.
449 int puts = 0;
450 for (int i = 0; i < patternParts.length; i++) {
451 String patternPart = patternParts[i];
452 if ((patternPart.indexOf('*') > -1 || patternPart.indexOf('?') > -1) && pathParts.length >= i + 1) {
453 if (puts > 0 || (i == 0 && !pattern.startsWith(this.pathSeparator))) {
454 buffer.append(this.pathSeparator);
455 }
456 buffer.append(pathParts[i]);
457 puts++;
458 }
459 }
460
461 // Append any trailing path parts.
462 for (int i = patternParts.length; i < pathParts.length; i++) {
463 if (puts > 0 || i > 0) {
464 buffer.append(this.pathSeparator);
465 }
466 buffer.append(pathParts[i]);
467 }
468
469 return buffer.toString();
470 }
471
472 /**
473 * Tokenize the given String into a String array via a StringTokenizer.
474 * Trims tokens and omits empty tokens.
475 * <p>
476 * The given delimiters string is supposed to consist of any number of
477 * delimiter characters. Each of those characters can be used to separate
478 * tokens. A delimiter is always a single character; for multi-character
479 * delimiters, consider using <code>delimitedListToStringArray</code>
480 *
481 * @param str the String to tokenize
482 * @param delimiters the delimiter characters, assembled as String (each of
483 * those characters is individually considered as delimiter).
484 * @return an array of the tokens
485 * @see java.util.StringTokenizer
486 * @see java.lang.String#trim()
487 */
488 public static String[] tokenizeToStringArray(String str, String delimiters) {
489 if (str == null) {
490 return null;
491 }
492 StringTokenizer st = new StringTokenizer(str, delimiters);
493 List<String> tokens = new ArrayList<String>();
494 while (st.hasMoreTokens()) {
495 String token = st.nextToken();
496 token = token.trim();
497 if (token.length() > 0) {
498 tokens.add(token);
499 }
500 }
501 return tokens.toArray(new String[tokens.size()]);
502 }
503
504 private static boolean different(boolean caseSensitive, char ch, char other) {
505 return caseSensitive ? ch != other : Character.toUpperCase(ch) != Character.toUpperCase(other);
506 }
507 }