Monitoring JavaMelody on /SapphireIMS

1 /*

2  * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.

3  * ORACLE PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.

4  *

5  *

6  *

7  *

8  *

9  *

10  *

11  *

12  *

13  *

14  *

15  *

16  *

17  *

18  *

19  *

20  *

21  *

22  *

23  *

24  */

25 

26 package java.util.regex;

27 

28 import java.util.Objects;

29 

30 /**

31  * An engine that performs match operations on a {@linkplain java.lang.CharSequence

32  * character sequence} by interpreting a {@link Pattern}.

33  *

34  * <p> A matcher is created from a pattern by invoking the pattern's {@link

35  * Pattern#matcher matcher} method.  Once created, a matcher can be used to

36  * perform three different kinds of match operations:

37  *

38  * <ul>

39  *

40  *   <li><p> The {@link #matches matches} method attempts to match the entire

41  *   input sequence against the pattern.  </p></li>

42  *

43  *   <li><p> The {@link #lookingAt lookingAt} method attempts to match the

44  *   input sequence, starting at the beginning, against the pattern.  </p></li>

45  *

46  *   <li><p> The {@link #find find} method scans the input sequence looking for

47  *   the next subsequence that matches the pattern.  </p></li>

48  *

49  * </ul>

50  *

51  * <p> Each of these methods returns a boolean indicating success or failure.

52  * More information about a successful match can be obtained by querying the

53  * state of the matcher.

54  *

55  * <p> A matcher finds matches in a subset of its input called the

56  * <i>region</i>. By default, the region contains all of the matcher's input.

57  * The region can be modified via the{@link #region region} method and queried

58  * via the {@link #regionStart regionStart} and {@link #regionEnd regionEnd}

59  * methods. The way that the region boundaries interact with some pattern

60  * constructs can be changed. See {@link #useAnchoringBounds

61  * useAnchoringBounds} and {@link #useTransparentBounds useTransparentBounds}

62  * for more details.

63  *

64  * <p> This class also defines methods for replacing matched subsequences with

65  * new strings whose contents can, if desired, be computed from the match

66  * result.  The {@link #appendReplacement appendReplacement} and {@link

67  * #appendTail appendTail} methods can be used in tandem in order to collect

68  * the result into an existing string buffer, or the more convenient {@link

69  * #replaceAll replaceAll} method can be used to create a string in which every

70  * matching subsequence in the input sequence is replaced.

71  *

72  * <p> The explicit state of a matcher includes the start and end indices of

73  * the most recent successful match.  It also includes the start and end

74  * indices of the input subsequence captured by each <a

75  * href="Pattern.html#cg">capturing group</a> in the pattern as well as a total

76  * count of such subsequences.  As a convenience, methods are also provided for

77  * returning these captured subsequences in string form.

78  *

79  * <p> The explicit state of a matcher is initially undefined; attempting to

80  * query any part of it before a successful match will cause an {@link

81  * IllegalStateException} to be thrown.  The explicit state of a matcher is

82  * recomputed by every match operation.

83  *

84  * <p> The implicit state of a matcher includes the input character sequence as

85  * well as the <i>append position</i>, which is initially zero and is updated

86  * by the {@link #appendReplacement appendReplacement} method.

87  *

88  * <p> A matcher may be reset explicitly by invoking its {@link #reset()}

89  * method or, if a new input sequence is desired, its {@link

90  * #reset(java.lang.CharSequence) reset(CharSequence)} method.  Resetting a

91  * matcher discards its explicit state information and sets the append position

92  * to zero.

93  *

94  * <p> Instances of this class are not safe for use by multiple concurrent

95  * threads. </p>

96  *

97  *

98  * @author      Mike McCloskey

99  * @author      Mark Reinhold

100  * @author      JSR-51 Expert Group

101  * @since       1.4

102  * @spec        JSR-51

103  */

104 

105 public final class Matcher implements MatchResult {

106 

107     /**

108      * The Pattern object that created this Matcher.

109      */

110     Pattern parentPattern;

111 

112     /**

113      * The storage used by groups. They may contain invalid values if

114      * a group was skipped during the matching.

115      */

116     int[] groups;

117 

118     /**

119      * The range within the sequence that is to be matched. Anchors

120      * will match at these "hard" boundaries. Changing the region

121      * changes these values.

122      */

123     int from, to;

124 

125     /**

126      * Lookbehind uses this value to ensure that the subexpression

127      * match ends at the point where the lookbehind was encountered.

128      */

129     int lookbehindTo;

130 

131     /**

132      * The original string being matched.

133      */

134     CharSequence text;

135 

136     /**

137      * Matcher state used by the last node. NOANCHOR is used when a

138      * match does not have to consume all of the input. ENDANCHOR is

139      * the mode used for matching all the input.

140      */

141     static final int ENDANCHOR = 1;

142     static final int NOANCHOR = 0;

143     int acceptMode = NOANCHOR;

144 

145     /**

146      * The range of string that last matched the pattern. If the last

147      * match failed then first is -1; last initially holds 0 then it

148      * holds the index of the end of the last match (which is where the

149      * next search starts).

150      */

151     int first = -1, last = 0;

152 

153     /**

154      * The end index of what matched in the last match operation.

155      */

156     int oldLast = -1;

157 

158     /**

159      * The index of the last position appended in a substitution.

160      */

161     int lastAppendPosition = 0;

162 

163     /**

164      * Storage used by nodes to tell what repetition they are on in

165      * a pattern, and where groups begin. The nodes themselves are stateless,

166      * so they rely on this field to hold state during a match.

167      */

168     int[] locals;

169 

170     /**

171      * Boolean indicating whether or not more input could change

172      * the results of the last match.

173      *

174      * If hitEnd is true, and a match was found, then more input

175      * might cause a different match to be found.

176      * If hitEnd is true and a match was not found, then more

177      * input could cause a match to be found.

178      * If hitEnd is false and a match was found, then more input

179      * will not change the match.

180      * If hitEnd is false and a match was not found, then more

181      * input will not cause a match to be found.

182      */

183     boolean hitEnd;

184 

185     /**

186      * Boolean indicating whether or not more input could change

187      * a positive match into a negative one.

188      *

189      * If requireEnd is true, and a match was found, then more

190      * input could cause the match to be lost.

191      * If requireEnd is false and a match was found, then more

192      * input might change the match but the match won't be lost.

193      * If a match was not found, then requireEnd has no meaning.

194      */

195     boolean requireEnd;

196 

197     /**

198      * If transparentBounds is true then the boundaries of this

199      * matcher's region are transparent to lookahead, lookbehind,

200      * and boundary matching constructs that try to see beyond them.

201      */

202     boolean transparentBounds = false;

203 

204     /**

205      * If anchoringBounds is true then the boundaries of this

206      * matcher's region match anchors such as ^ and $.

207      */

208     boolean anchoringBounds = true;

209 

210     /**

211      * No default constructor.

212      */

213     Matcher() {

214     }

215 

216     /**

217      * All matchers have the state used by Pattern during a match.

218      */

219     Matcher(Pattern parent, CharSequence text) {

220         this.parentPattern = parent;

221         this.text = text;

222 

223         // Allocate state storage

224         int parentGroupCount = Math.max(parent.capturingGroupCount, 10);

225         groups = new int[parentGroupCount * 2];

226         locals = new int[parent.localCount];

227 

228         // Put fields into initial states

229         reset();

230     }

231 

232     /**

233      * Returns the pattern that is interpreted by this matcher.

234      *

235      * @return  The pattern for which this matcher was created

236      */

237     public Pattern pattern() {

238         return parentPattern;

239     }

240 

241     /**

242      * Returns the match state of this matcher as a {@link MatchResult}.

243      * The result is unaffected by subsequent operations performed upon this

244      * matcher.

245      *

246      * @return  a <code>MatchResult</code> with the state of this matcher

247      * @since 1.5

248      */

249     public MatchResult toMatchResult() {

250         Matcher result = new Matcher(this.parentPattern, text.toString());

251         result.first = this.first;

252         result.last = this.last;

253         result.groups = this.groups.clone();

254         return result;

255     }

256 

257     /**

258       * Changes the <tt>Pattern</tt> that this <tt>Matcher</tt> uses to

259       * find matches with.

260       *

261       * <p> This method causes this matcher to lose information

262       * about the groups of the last match that occurred. The

263       * matcher's position in the input is maintained and its

264       * last append position is unaffected.</p>

265       *

266       * @param  newPattern

267       *         The new pattern used by this matcher

268       * @return  This matcher

269       * @throws  IllegalArgumentException

270       *          If newPattern is <tt>null</tt>

271       * @since 1.5

272       */

273     public Matcher usePattern(Pattern newPattern) {

274         if (newPattern == null)

275             throw new IllegalArgumentException("Pattern cannot be null");

276         parentPattern = newPattern;

277 

278         // Reallocate state storage

279         int parentGroupCount = Math.max(newPattern.capturingGroupCount, 10);

280         groups = new int[parentGroupCount * 2];

281         locals = new int[newPattern.localCount];

282         for (int i = 0; i < groups.length; i++)

283             groups[i] = -1;

284         for (int i = 0; i < locals.length; i++)

285             locals[i] = -1;

286         return this;

287     }

288 

289     /**

290      * Resets this matcher.

291      *

292      * <p> Resetting a matcher discards all of its explicit state information

293      * and sets its append position to zero. The matcher's region is set to the

294      * default region, which is its entire character sequence. The anchoring

295      * and transparency of this matcher's region boundaries are unaffected.

296      *

297      * @return  This matcher

298      */

299     public Matcher reset() {

300         first = -1;

301         last = 0;

302         oldLast = -1;

303         for(int i=0; i<groups.length; i++)

304             groups[i] = -1;

305         for(int i=0; i<locals.length; i++)

306             locals[i] = -1;

307         lastAppendPosition = 0;

308         from = 0;

309         to = getTextLength();

310         return this;

311     }

312 

313     /**

314      * Resets this matcher with a new input sequence.

315      *

316      * <p> Resetting a matcher discards all of its explicit state information

317      * and sets its append position to zero.  The matcher's region is set to

318      * the default region, which is its entire character sequence.  The

319      * anchoring and transparency of this matcher's region boundaries are

320      * unaffected.

321      *

322      * @param  input

323      *         The new input character sequence

324      *

325      * @return  This matcher

326      */

327     public Matcher reset(CharSequence input) {

328         text = input;

329         return reset();

330     }

331 

332     /**

333      * Returns the start index of the previous match.

334      *

335      * @return  The index of the first character matched

336      *

337      * @throws  IllegalStateException

338      *          If no match has yet been attempted,

339      *          or if the previous match operation failed

340      */

341     public int start() {

342         if (first < 0)

343             throw new IllegalStateException("No match available");

344         return first;

345     }

346 

347     /**

348      * Returns the start index of the subsequence captured by the given group

349      * during the previous match operation.

350      *

351      * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left

352      * to right, starting at one.  Group zero denotes the entire pattern, so

353      * the expression <i>m.</i><tt>start(0)</tt> is equivalent to

354      * <i>m.</i><tt>start()</tt>.  </p>

355      *

356      * @param  group

357      *         The index of a capturing group in this matcher's pattern

358      *

359      * @return  The index of the first character captured by the group,

360      *          or <tt>-1</tt> if the match was successful but the group

361      *          itself did not match anything

362      *

363      * @throws  IllegalStateException

364      *          If no match has yet been attempted,

365      *          or if the previous match operation failed

366      *

367      * @throws  IndexOutOfBoundsException

368      *          If there is no capturing group in the pattern

369      *          with the given index

370      */

371     public int start(int group) {

372         if (first < 0)

373             throw new IllegalStateException("No match available");

374         if (group < 0 || group > groupCount())

375             throw new IndexOutOfBoundsException("No group " + group);

376         return groups[group * 2];

377     }

378 

379     /**

380      * Returns the start index of the subsequence captured by the given

381      * <a href="Pattern.html#groupname">named-capturing group</a> during the

382      * previous match operation.

383      *

384      * @param  name

385      *         The name of a named-capturing group in this matcher's pattern

386      *

387      * @return  The index of the first character captured by the group,

388      *          or {@code -1} if the match was successful but the group

389      *          itself did not match anything

390      *

391      * @throws  IllegalStateException

392      *          If no match has yet been attempted,

393      *          or if the previous match operation failed

394      *

395      * @throws  IllegalArgumentException

396      *          If there is no capturing group in the pattern

397      *          with the given name

398      * @since 1.8

399      */

400     public int start(String name) {

401         return groups[getMatchedGroupIndex(name) * 2];

402     }

403 

404     /**

405      * Returns the offset after the last character matched.

406      *

407      * @return  The offset after the last character matched

408      *

409      * @throws  IllegalStateException

410      *          If no match has yet been attempted,

411      *          or if the previous match operation failed

412      */

413     public int end() {

414         if (first < 0)

415             throw new IllegalStateException("No match available");

416         return last;

417     }

418 

419     /**

420      * Returns the offset after the last character of the subsequence

421      * captured by the given group during the previous match operation.

422      *

423      * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left

424      * to right, starting at one.  Group zero denotes the entire pattern, so

425      * the expression <i>m.</i><tt>end(0)</tt> is equivalent to

426      * <i>m.</i><tt>end()</tt>.  </p>

427      *

428      * @param  group

429      *         The index of a capturing group in this matcher's pattern

430      *

431      * @return  The offset after the last character captured by the group,

432      *          or <tt>-1</tt> if the match was successful

433      *          but the group itself did not match anything

434      *

435      * @throws  IllegalStateException

436      *          If no match has yet been attempted,

437      *          or if the previous match operation failed

438      *

439      * @throws  IndexOutOfBoundsException

440      *          If there is no capturing group in the pattern

441      *          with the given index

442      */

443     public int end(int group) {

444         if (first < 0)

445             throw new IllegalStateException("No match available");

446         if (group < 0 || group > groupCount())

447             throw new IndexOutOfBoundsException("No group " + group);

448         return groups[group * 2 + 1];

449     }

450 

451     /**

452      * Returns the offset after the last character of the subsequence

453      * captured by the given <a href="Pattern.html#groupname">named-capturing

454      * group</a> during the previous match operation.

455      *

456      * @param  name

457      *         The name of a named-capturing group in this matcher's pattern

458      *

459      * @return  The offset after the last character captured by the group,

460      *          or {@code -1} if the match was successful

461      *          but the group itself did not match anything

462      *

463      * @throws  IllegalStateException

464      *          If no match has yet been attempted,

465      *          or if the previous match operation failed

466      *

467      * @throws  IllegalArgumentException

468      *          If there is no capturing group in the pattern

469      *          with the given name

470      * @since 1.8

471      */

472     public int end(String name) {

473         return groups[getMatchedGroupIndex(name) * 2 + 1];

474     }

475 

476     /**

477      * Returns the input subsequence matched by the previous match.

478      *

479      * <p> For a matcher <i>m</i> with input sequence <i>s</i>,

480      * the expressions <i>m.</i><tt>group()</tt> and

481      * <i>s.</i><tt>substring(</tt><i>m.</i><tt>start(),</tt>&nbsp;<i>m.</i><tt>end())</tt>

482      * are equivalent.  </p>

483      *

484      * <p> Note that some patterns, for example <tt>a*</tt>, match the empty

485      * string.  This method will return the empty string when the pattern

486      * successfully matches the empty string in the input.  </p>

487      *

488      * @return The (possibly empty) subsequence matched by the previous match,

489      *         in string form

490      *

491      * @throws  IllegalStateException

492      *          If no match has yet been attempted,

493      *          or if the previous match operation failed

494      */

495     public String group() {

496         return group(0);

497     }

498 

499     /**

500      * Returns the input subsequence captured by the given group during the

501      * previous match operation.

502      *

503      * <p> For a matcher <i>m</i>, input sequence <i>s</i>, and group index

504      * <i>g</i>, the expressions <i>m.</i><tt>group(</tt><i>g</i><tt>)</tt> and

505      * <i>s.</i><tt>substring(</tt><i>m.</i><tt>start(</tt><i>g</i><tt>),</tt>&nbsp;<i>m.</i><tt>end(</tt><i>g</i><tt>))</tt>

506      * are equivalent.  </p>

507      *

508      * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left

509      * to right, starting at one.  Group zero denotes the entire pattern, so

510      * the expression <tt>m.group(0)</tt> is equivalent to <tt>m.group()</tt>.

511      * </p>

512      *

513      * <p> If the match was successful but the group specified failed to match

514      * any part of the input sequence, then <tt>null</tt> is returned. Note

515      * that some groups, for example <tt>(a*)</tt>, match the empty string.

516      * This method will return the empty string when such a group successfully

517      * matches the empty string in the input.  </p>

518      *

519      * @param  group

520      *         The index of a capturing group in this matcher's pattern

521      *

522      * @return  The (possibly empty) subsequence captured by the group

523      *          during the previous match, or <tt>null</tt> if the group

524      *          failed to match part of the input

525      *

526      * @throws  IllegalStateException

527      *          If no match has yet been attempted,

528      *          or if the previous match operation failed

529      *

530      * @throws  IndexOutOfBoundsException

531      *          If there is no capturing group in the pattern

532      *          with the given index

533      */

534     public String group(int group) {

535         if (first < 0)

536             throw new IllegalStateException("No match found");

537         if (group < 0 || group > groupCount())

538             throw new IndexOutOfBoundsException("No group " + group);

539         if ((groups[group*2] == -1) || (groups[group*2+1] == -1))

540             return null;

541         return getSubSequence(groups[group * 2], groups[group * 2 + 1]).toString();

542     }

543 

544     /**

545      * Returns the input subsequence captured by the given

546      * <a href="Pattern.html#groupname">named-capturing group</a> during the previous

547      * match operation.

548      *

549      * <p> If the match was successful but the group specified failed to match

550      * any part of the input sequence, then <tt>null</tt> is returned. Note

551      * that some groups, for example <tt>(a*)</tt>, match the empty string.

552      * This method will return the empty string when such a group successfully

553      * matches the empty string in the input.  </p>

554      *

555      * @param  name

556      *         The name of a named-capturing group in this matcher's pattern

557      *

558      * @return  The (possibly empty) subsequence captured by the named group

559      *          during the previous match, or <tt>null</tt> if the group

560      *          failed to match part of the input

561      *

562      * @throws  IllegalStateException

563      *          If no match has yet been attempted,

564      *          or if the previous match operation failed

565      *

566      * @throws  IllegalArgumentException

567      *          If there is no capturing group in the pattern

568      *          with the given name

569      * @since 1.7

570      */

571     public String group(String name) {

572         int group = getMatchedGroupIndex(name);

573         if ((groups[group*2] == -1) || (groups[group*2+1] == -1))

574             return null;

575         return getSubSequence(groups[group * 2], groups[group * 2 + 1]).toString();

576     }

577 

578     /**

579      * Returns the number of capturing groups in this matcher's pattern.

580      *

581      * <p> Group zero denotes the entire pattern by convention. It is not

582      * included in this count.

583      *

584      * <p> Any non-negative integer smaller than or equal to the value

585      * returned by this method is guaranteed to be a valid group index for

586      * this matcher.  </p>

587      *

588      * @return The number of capturing groups in this matcher's pattern

589      */

590     public int groupCount() {

591         return parentPattern.capturingGroupCount - 1;

592     }

593 

594     /**

595      * Attempts to match the entire region against the pattern.

596      *

597      * <p> If the match succeeds then more information can be obtained via the

598      * <tt>start</tt>, <tt>end</tt>, and <tt>group</tt> methods.  </p>

599      *

600      * @return  <tt>true</tt> if, and only if, the entire region sequence

601      *          matches this matcher's pattern

602      */

603     public boolean matches() {

604         return match(from, ENDANCHOR);

605     }

606 

607     /**

608      * Attempts to find the next subsequence of the input sequence that matches

609      * the pattern.

610      *

611      * <p> This method starts at the beginning of this matcher's region, or, if

612      * a previous invocation of the method was successful and the matcher has

613      * not since been reset, at the first character not matched by the previous

614      * match.

615      *

616      * <p> If the match succeeds then more information can be obtained via the

617      * <tt>start</tt>, <tt>end</tt>, and <tt>group</tt> methods.  </p>

618      *

619      * @return  <tt>true</tt> if, and only if, a subsequence of the input

620      *          sequence matches this matcher's pattern

621      */

622     public boolean find() {

623         int nextSearchIndex = last;

624         if (nextSearchIndex == first)

625             nextSearchIndex++;

626 

627         // If next search starts before region, start it at region

628         if (nextSearchIndex < from)

629             nextSearchIndex = from;

630 

631         // If next search starts beyond region then it fails

632         if (nextSearchIndex > to) {

633             for (int i = 0; i < groups.length; i++)

634                 groups[i] = -1;

635             return false;

636         }

637         return search(nextSearchIndex);

638     }

639 

640     /**

641      * Resets this matcher and then attempts to find the next subsequence of

642      * the input sequence that matches the pattern, starting at the specified

643      * index.

644      *

645      * <p> If the match succeeds then more information can be obtained via the

646      * <tt>start</tt>, <tt>end</tt>, and <tt>group</tt> methods, and subsequent

647      * invocations of the {@link #find()} method will start at the first

648      * character not matched by this match.  </p>

649      *

650      * @param start the index to start searching for a match

651      * @throws  IndexOutOfBoundsException

652      *          If start is less than zero or if start is greater than the

653      *          length of the input sequence.

654      *

655      * @return  <tt>true</tt> if, and only if, a subsequence of the input

656      *          sequence starting at the given index matches this matcher's

657      *          pattern

658      */

659     public boolean find(int start) {

660         int limit = getTextLength();

661         if ((start < 0) || (start > limit))

662             throw new IndexOutOfBoundsException("Illegal start index");

663         reset();

664         return search(start);

665     }

666 

667     /**

668      * Attempts to match the input sequence, starting at the beginning of the

669      * region, against the pattern.

670      *

671      * <p> Like the {@link #matches matches} method, this method always starts

672      * at the beginning of the region; unlike that method, it does not

673      * require that the entire region be matched.

674      *

675      * <p> If the match succeeds then more information can be obtained via the

676      * <tt>start</tt>, <tt>end</tt>, and <tt>group</tt> methods.  </p>

677      *

678      * @return  <tt>true</tt> if, and only if, a prefix of the input

679      *          sequence matches this matcher's pattern

680      */

681     public boolean lookingAt() {

682         return match(from, NOANCHOR);

683     }

684 

685     /**

686      * Returns a literal replacement <code>String</code> for the specified

687      * <code>String</code>.

688      *

689      * This method produces a <code>String</code> that will work

690      * as a literal replacement <code>s</code> in the

691      * <code>appendReplacement</code> method of the {@link Matcher} class.

692      * The <code>String</code> produced will match the sequence of characters

693      * in <code>s</code> treated as a literal sequence. Slashes ('\') and

694      * dollar signs ('$') will be given no special meaning.

695      *

696      * @param  s The string to be literalized

697      * @return  A literal string replacement

698      * @since 1.5

699      */

700     public static String quoteReplacement(String s) {

701         if ((s.indexOf('\\') == -1) && (s.indexOf('$') == -1))

702             return s;

703         StringBuilder sb = new StringBuilder();

704         for (int i=0; i<s.length(); i++) {

705             char c = s.charAt(i);

706             if (c == '\\' || c == '$') {

707                 sb.append('\\');

708             }

709             sb.append(c);

710         }

711         return sb.toString();

712     }

713 

714     /**

715      * Implements a non-terminal append-and-replace step.

716      *

717      * <p> This method performs the following actions: </p>

718      *

719      * <ol>

720      *

721      *   <li><p> It reads characters from the input sequence, starting at the

722      *   append position, and appends them to the given string buffer.  It

723      *   stops after reading the last character preceding the previous match,

724      *   that is, the character at index {@link

725      *   #start()}&nbsp;<tt>-</tt>&nbsp;<tt>1</tt>.  </p></li>

726      *

727      *   <li><p> It appends the given replacement string to the string buffer.

728      *   </p></li>

729      *

730      *   <li><p> It sets the append position of this matcher to the index of

731      *   the last character matched, plus one, that is, to {@link #end()}.

732      *   </p></li>

733      *

734      * </ol>

735      *

736      * <p> The replacement string may contain references to subsequences

737      * captured during the previous match: Each occurrence of

738      * <tt>${</tt><i>name</i><tt>}</tt> or <tt>$</tt><i>g</i>

739      * will be replaced by the result of evaluating the corresponding

740      * {@link #group(String) group(name)} or {@link #group(int) group(g)}

741      * respectively. For  <tt>$</tt><i>g</i>,

742      * the first number after the <tt>$</tt> is always treated as part of

743      * the group reference. Subsequent numbers are incorporated into g if

744      * they would form a legal group reference. Only the numerals '0'

745      * through '9' are considered as potential components of the group

746      * reference. If the second group matched the string <tt>"foo"</tt>, for

747      * example, then passing the replacement string <tt>"$2bar"</tt> would

748      * cause <tt>"foobar"</tt> to be appended to the string buffer. A dollar

749      * sign (<tt>$</tt>) may be included as a literal in the replacement

750      * string by preceding it with a backslash (<tt>\$</tt>).

751      *

752      * <p> Note that backslashes (<tt>\</tt>) and dollar signs (<tt>$</tt>) in

753      * the replacement string may cause the results to be different than if it

754      * were being treated as a literal replacement string. Dollar signs may be

755      * treated as references to captured subsequences as described above, and

756      * backslashes are used to escape literal characters in the replacement

757      * string.

758      *

759      * <p> This method is intended to be used in a loop together with the

760      * {@link #appendTail appendTail} and {@link #find find} methods.  The

761      * following code, for example, writes <tt>one dog two dogs in the

762      * yard</tt> to the standard-output stream: </p>

763      *

764      * <blockquote><pre>

765      * Pattern p = Pattern.compile("cat");

766      * Matcher m = p.matcher("one cat two cats in the yard");

767      * StringBuffer sb = new StringBuffer();

768      * while (m.find()) {

769      *     m.appendReplacement(sb, "dog");

770      * }

771      * m.appendTail(sb);

772      * System.out.println(sb.toString());</pre></blockquote>

773      *

774      * @param  sb

775      *         The target string buffer

776      *

777      * @param  replacement

778      *         The replacement string

779      *

780      * @return  This matcher

781      *

782      * @throws  IllegalStateException

783      *          If no match has yet been attempted,

784      *          or if the previous match operation failed

785      *

786      * @throws  IllegalArgumentException

787      *          If the replacement string refers to a named-capturing

788      *          group that does not exist in the pattern

789      *

790      * @throws  IndexOutOfBoundsException

791      *          If the replacement string refers to a capturing group

792      *          that does not exist in the pattern

793      */

794     public Matcher appendReplacement(StringBuffer sb, String replacement) {

795 

796         // If no match, return error

797         if (first < 0)

798             throw new IllegalStateException("No match available");

799 

800         // Process substitution string to replace group references with groups

801         int cursor = 0;

802         StringBuilder result = new StringBuilder();

803 

804         while (cursor < replacement.length()) {

805             char nextChar = replacement.charAt(cursor);

806             if (nextChar == '\\') {

807                 cursor++;

808                 if (cursor == replacement.length())

809                     throw new IllegalArgumentException(

810                         "character to be escaped is missing");

811                 nextChar = replacement.charAt(cursor);

812                 result.append(nextChar);

813                 cursor++;

814             } else if (nextChar == '$') {

815                 // Skip past $

816                 cursor++;

817                 // Throw IAE if this "$" is the last character in replacement

818                 if (cursor == replacement.length())

819                    throw new IllegalArgumentException(

820                         "Illegal group reference: group index is missing");

821                 nextChar = replacement.charAt(cursor);

822                 int refNum = -1;

823                 if (nextChar == '{') {

824                     cursor++;

825                     StringBuilder gsb = new StringBuilder();

826                     while (cursor < replacement.length()) {

827                         nextChar = replacement.charAt(cursor);

828                         if (ASCII.isLower(nextChar) ||

829                             ASCII.isUpper(nextChar) ||

830                             ASCII.isDigit(nextChar)) {

831                             gsb.append(nextChar);

832                             cursor++;

833                         } else {

834                             break;

835                         }

836                     }

837                     if (gsb.length() == 0)

838                         throw new IllegalArgumentException(

839                             "named capturing group has 0 length name");

840                     if (nextChar != '}')

841                         throw new IllegalArgumentException(

842                             "named capturing group is missing trailing '}'");

843                     String gname = gsb.toString();

844                     if (ASCII.isDigit(gname.charAt(0)))

845                         throw new IllegalArgumentException(

846                             "capturing group name {" + gname +

847                             "} starts with digit character");

848                     if (!parentPattern.namedGroups().containsKey(gname))

849                         throw new IllegalArgumentException(

850                             "No group with name {" + gname + "}");

851                     refNum = parentPattern.namedGroups().get(gname);

852                     cursor++;

853                 } else {

854                     // The first number is always a group

855                     refNum = (int)nextChar - '0';

856                     if ((refNum < 0)||(refNum > 9))

857                         throw new IllegalArgumentException(

858                             "Illegal group reference");

859                     cursor++;

860                     // Capture the largest legal group string

861                     boolean done = false;

862                     while (!done) {

863                         if (cursor >= replacement.length()) {

864                             break;

865                         }

866                         int nextDigit = replacement.charAt(cursor) - '0';

867                         if ((nextDigit < 0)||(nextDigit > 9)) { // not a number

868                             break;

869                         }

870                         int newRefNum = (refNum * 10) + nextDigit;

871                         if (groupCount() < newRefNum) {

872                             done = true;

873                         } else {

874                             refNum = newRefNum;

875                             cursor++;

876                         }

877                     }

878                 }

879                 // Append group

880                 if (start(refNum) != -1 && end(refNum) != -1)

881                     result.append(text, start(refNum), end(refNum));

882             } else {

883                 result.append(nextChar);

884                 cursor++;

885             }

886         }

887         // Append the intervening text

888         sb.append(text, lastAppendPosition, first);

889         // Append the match substitution

890         sb.append(result);

891 

892         lastAppendPosition = last;

893         return this;

894     }

895 

896     /**

897      * Implements a terminal append-and-replace step.

898      *

899      * <p> This method reads characters from the input sequence, starting at

900      * the append position, and appends them to the given string buffer.  It is

901      * intended to be invoked after one or more invocations of the {@link

902      * #appendReplacement appendReplacement} method in order to copy the

903      * remainder of the input sequence.  </p>

904      *

905      * @param  sb

906      *         The target string buffer

907      *

908      * @return  The target string buffer

909      */

910     public StringBuffer appendTail(StringBuffer sb) {

911         sb.append(text, lastAppendPosition, getTextLength());

912         return sb;

913     }

914 

915     /**

916      * Replaces every subsequence of the input sequence that matches the

917      * pattern with the given replacement string.

918      *

919      * <p> This method first resets this matcher.  It then scans the input

920      * sequence looking for matches of the pattern.  Characters that are not

921      * part of any match are appended directly to the result string; each match

922      * is replaced in the result by the replacement string.  The replacement

923      * string may contain references to captured subsequences as in the {@link

924      * #appendReplacement appendReplacement} method.

925      *

926      * <p> Note that backslashes (<tt>\</tt>) and dollar signs (<tt>$</tt>) in

927      * the replacement string may cause the results to be different than if it

928      * were being treated as a literal replacement string. Dollar signs may be

929      * treated as references to captured subsequences as described above, and

930      * backslashes are used to escape literal characters in the replacement

931      * string.

932      *

933      * <p> Given the regular expression <tt>a*b</tt>, the input

934      * <tt>"aabfooaabfooabfoob"</tt>, and the replacement string

935      * <tt>"-"</tt>, an invocation of this method on a matcher for that

936      * expression would yield the string <tt>"-foo-foo-foo-"</tt>.

937      *

938      * <p> Invoking this method changes this matcher's state.  If the matcher

939      * is to be used in further matching operations then it should first be

940      * reset.  </p>

941      *

942      * @param  replacement

943      *         The replacement string

944      *

945      * @return  The string constructed by replacing each matching subsequence

946      *          by the replacement string, substituting captured subsequences

947      *          as needed

948      */

949     public String replaceAll(String replacement) {

950         reset();

951         boolean result = find();

952         if (result) {

953             StringBuffer sb = new StringBuffer();

954             do {

955                 appendReplacement(sb, replacement);

956                 result = find();

957             } while (result);

958             appendTail(sb);

959             return sb.toString();

960         }

961         return text.toString();

962     }

963 

964     /**

965      * Replaces the first subsequence of the input sequence that matches the

966      * pattern with the given replacement string.

967      *

968      * <p> This method first resets this matcher.  It then scans the input

969      * sequence looking for a match of the pattern.  Characters that are not

970      * part of the match are appended directly to the result string; the match

971      * is replaced in the result by the replacement string.  The replacement

972      * string may contain references to captured subsequences as in the {@link

973      * #appendReplacement appendReplacement} method.

974      *

975      * <p>Note that backslashes (<tt>\</tt>) and dollar signs (<tt>$</tt>) in

976      * the replacement string may cause the results to be different than if it

977      * were being treated as a literal replacement string. Dollar signs may be

978      * treated as references to captured subsequences as described above, and

979      * backslashes are used to escape literal characters in the replacement

980      * string.

981      *

982      * <p> Given the regular expression <tt>dog</tt>, the input

983      * <tt>"zzzdogzzzdogzzz"</tt>, and the replacement string

984      * <tt>"cat"</tt>, an invocation of this method on a matcher for that

985      * expression would yield the string <tt>"zzzcatzzzdogzzz"</tt>.  </p>

986      *

987      * <p> Invoking this method changes this matcher's state.  If the matcher

988      * is to be used in further matching operations then it should first be

989      * reset.  </p>

990      *

991      * @param  replacement

992      *         The replacement string

993      * @return  The string constructed by replacing the first matching

994      *          subsequence by the replacement string, substituting captured

995      *          subsequences as needed

996      */

997     public String replaceFirst(String replacement) {

998         if (replacement == null)

999             throw new NullPointerException("replacement");

1000         reset();

1001         if (!find())

1002             return text.toString();

1003         StringBuffer sb = new StringBuffer();

1004         appendReplacement(sb, replacement);

1005         appendTail(sb);

1006         return sb.toString();

1007     }

1008 

1009     /**

1010      * Sets the limits of this matcher's region. The region is the part of the

1011      * input sequence that will be searched to find a match. Invoking this

1012      * method resets the matcher, and then sets the region to start at the

1013      * index specified by the <code>start</code> parameter and end at the

1014      * index specified by the <code>end</code> parameter.

1015      *

1016      * <p>Depending on the transparency and anchoring being used (see

1017      * {@link #useTransparentBounds useTransparentBounds} and

1018      * {@link #useAnchoringBounds useAnchoringBounds}), certain constructs such

1019      * as anchors may behave differently at or around the boundaries of the

1020      * region.

1021      *

1022      * @param  start

1023      *         The index to start searching at (inclusive)

1024      * @param  end

1025      *         The index to end searching at (exclusive)

1026      * @throws  IndexOutOfBoundsException

1027      *          If start or end is less than zero, if

1028      *          start is greater than the length of the input sequence, if

1029      *          end is greater than the length of the input sequence, or if

1030      *          start is greater than end.

1031      * @return  this matcher

1032      * @since 1.5

1033      */

1034     public Matcher region(int start, int end) {

1035         if ((start < 0) || (start > getTextLength()))

1036             throw new IndexOutOfBoundsException("start");

1037         if ((end < 0) || (end > getTextLength()))

1038             throw new IndexOutOfBoundsException("end");

1039         if (start > end)

1040             throw new IndexOutOfBoundsException("start > end");

1041         reset();

1042         from = start;

1043         to = end;

1044         return this;

1045     }

1046 

1047     /**

1048      * Reports the start index of this matcher's region. The

1049      * searches this matcher conducts are limited to finding matches

1050      * within {@link #regionStart regionStart} (inclusive) and

1051      * {@link #regionEnd regionEnd} (exclusive).

1052      *

1053      * @return  The starting point of this matcher's region

1054      * @since 1.5

1055      */

1056     public int regionStart() {

1057         return from;

1058     }

1059 

1060     /**

1061      * Reports the end index (exclusive) of this matcher's region.

1062      * The searches this matcher conducts are limited to finding matches

1063      * within {@link #regionStart regionStart} (inclusive) and

1064      * {@link #regionEnd regionEnd} (exclusive).

1065      *

1066      * @return  the ending point of this matcher's region

1067      * @since 1.5

1068      */

1069     public int regionEnd() {

1070         return to;

1071     }

1072 

1073     /**

1074      * Queries the transparency of region bounds for this matcher.

1075      *

1076      * <p> This method returns <tt>true</tt> if this matcher uses

1077      * <i>transparent</i> bounds, <tt>false</tt> if it uses <i>opaque</i>

1078      * bounds.

1079      *

1080      * <p> See {@link #useTransparentBounds useTransparentBounds} for a

1081      * description of transparent and opaque bounds.

1082      *

1083      * <p> By default, a matcher uses opaque region boundaries.

1084      *

1085      * @return <tt>true</tt> iff this matcher is using transparent bounds,

1086      *         <tt>false</tt> otherwise.

1087      * @see java.util.regex.Matcher#useTransparentBounds(boolean)

1088      * @since 1.5

1089      */

1090     public boolean hasTransparentBounds() {

1091         return transparentBounds;

1092     }

1093 

1094     /**

1095      * Sets the transparency of region bounds for this matcher.

1096      *

1097      * <p> Invoking this method with an argument of <tt>true</tt> will set this

1098      * matcher to use <i>transparent</i> bounds. If the boolean

1099      * argument is <tt>false</tt>, then <i>opaque</i> bounds will be used.

1100      *

1101      * <p> Using transparent bounds, the boundaries of this

1102      * matcher's region are transparent to lookahead, lookbehind,

1103      * and boundary matching constructs. Those constructs can see beyond the

1104      * boundaries of the region to see if a match is appropriate.

1105      *

1106      * <p> Using opaque bounds, the boundaries of this matcher's

1107      * region are opaque to lookahead, lookbehind, and boundary matching

1108      * constructs that may try to see beyond them. Those constructs cannot

1109      * look past the boundaries so they will fail to match anything outside

1110      * of the region.

1111      *

1112      * <p> By default, a matcher uses opaque bounds.

1113      *

1114      * @param  b a boolean indicating whether to use opaque or transparent

1115      *         regions

1116      * @return this matcher

1117      * @see java.util.regex.Matcher#hasTransparentBounds

1118      * @since 1.5

1119      */

1120     public Matcher useTransparentBounds(boolean b) {

1121         transparentBounds = b;

1122         return this;

1123     }

1124 

1125     /**

1126      * Queries the anchoring of region bounds for this matcher.

1127      *

1128      * <p> This method returns <tt>true</tt> if this matcher uses

1129      * <i>anchoring</i> bounds, <tt>false</tt> otherwise.

1130      *

1131      * <p> See {@link #useAnchoringBounds useAnchoringBounds} for a

1132      * description of anchoring bounds.

1133      *

1134      * <p> By default, a matcher uses anchoring region boundaries.

1135      *

1136      * @return <tt>true</tt> iff this matcher is using anchoring bounds,

1137      *         <tt>false</tt> otherwise.

1138      * @see java.util.regex.Matcher#useAnchoringBounds(boolean)

1139      * @since 1.5

1140      */

1141     public boolean hasAnchoringBounds() {

1142         return anchoringBounds;

1143     }

1144 

1145     /**

1146      * Sets the anchoring of region bounds for this matcher.

1147      *

1148      * <p> Invoking this method with an argument of <tt>true</tt> will set this

1149      * matcher to use <i>anchoring</i> bounds. If the boolean

1150      * argument is <tt>false</tt>, then <i>non-anchoring</i> bounds will be

1151      * used.

1152      *

1153      * <p> Using anchoring bounds, the boundaries of this

1154      * matcher's region match anchors such as ^ and $.

1155      *

1156      * <p> Without anchoring bounds, the boundaries of this

1157      * matcher's region will not match anchors such as ^ and $.

1158      *

1159      * <p> By default, a matcher uses anchoring region boundaries.

1160      *

1161      * @param  b a boolean indicating whether or not to use anchoring bounds.

1162      * @return this matcher

1163      * @see java.util.regex.Matcher#hasAnchoringBounds

1164      * @since 1.5

1165      */

1166     public Matcher useAnchoringBounds(boolean b) {

1167         anchoringBounds = b;

1168         return this;

1169     }

1170 

1171     /**

1172      * <p>Returns the string representation of this matcher. The

1173      * string representation of a <code>Matcher</code> contains information

1174      * that may be useful for debugging. The exact format is unspecified.

1175      *

1176      * @return  The string representation of this matcher

1177      * @since 1.5

1178      */

1179     public String toString() {

1180         StringBuilder sb = new StringBuilder();

1181         sb.append("java.util.regex.Matcher");

1182         sb.append("[pattern=" + pattern());

1183         sb.append(" region=");

1184         sb.append(regionStart() + "," + regionEnd());

1185         sb.append(" lastmatch=");

1186         if ((first >= 0) && (group() != null)) {

1187             sb.append(group());

1188         }

1189         sb.append("]");

1190         return sb.toString();

1191     }

1192 

1193     /**

1194      * <p>Returns true if the end of input was hit by the search engine in

1195      * the last match operation performed by this matcher.

1196      *

1197      * <p>When this method returns true, then it is possible that more input

1198      * would have changed the result of the last search.

1199      *

1200      * @return  true iff the end of input was hit in the last match; false

1201      *          otherwise

1202      * @since 1.5

1203      */

1204     public boolean hitEnd() {

1205         return hitEnd;

1206     }

1207 

1208     /**

1209      * <p>Returns true if more input could change a positive match into a

1210      * negative one.

1211      *

1212      * <p>If this method returns true, and a match was found, then more

1213      * input could cause the match to be lost. If this method returns false

1214      * and a match was found, then more input might change the match but the

1215      * match won't be lost. If a match was not found, then requireEnd has no

1216      * meaning.

1217      *

1218      * @return  true iff more input could change a positive match into a

1219      *          negative one.

1220      * @since 1.5

1221      */

1222     public boolean requireEnd() {

1223         return requireEnd;

1224     }

1225 

1226     /**

1227      * Initiates a search to find a Pattern within the given bounds.

1228      * The groups are filled with default values and the match of the root

1229      * of the state machine is called. The state machine will hold the state

1230      * of the match as it proceeds in this matcher.

1231      *

1232      * Matcher.from is not set here, because it is the "hard" boundary

1233      * of the start of the search which anchors will set to. The from param

1234      * is the "soft" boundary of the start of the search, meaning that the

1235      * regex tries to match at that index but ^ won't match there. Subsequent

1236      * calls to the search methods start at a new "soft" boundary which is

1237      * the end of the previous match.

1238      */

1239     boolean search(int from) {

1240         this.hitEnd = false;

1241         this.requireEnd = false;

1242         from        = from < 0 ? 0 : from;

1243         this.first  = from;

1244         this.oldLast = oldLast < 0 ? from : oldLast;

1245         for (int i = 0; i < groups.length; i++)

1246             groups[i] = -1;

1247         acceptMode = NOANCHOR;

1248         boolean result = parentPattern.root.match(this, from, text);

1249         if (!result)

1250             this.first = -1;

1251         this.oldLast = this.last;

1252         return result;

1253     }

1254 

1255     /**

1256      * Initiates a search for an anchored match to a Pattern within the given

1257      * bounds. The groups are filled with default values and the match of the

1258      * root of the state machine is called. The state machine will hold the

1259      * state of the match as it proceeds in this matcher.

1260      */

1261     boolean match(int from, int anchor) {

1262         this.hitEnd = false;

1263         this.requireEnd = false;

1264         from        = from < 0 ? 0 : from;

1265         this.first  = from;

1266         this.oldLast = oldLast < 0 ? from : oldLast;

1267         for (int i = 0; i < groups.length; i++)

1268             groups[i] = -1;

1269         acceptMode = anchor;

1270         boolean result = parentPattern.matchRoot.match(this, from, text);

1271         if (!result)

1272             this.first = -1;

1273         this.oldLast = this.last;

1274         return result;

1275     }

1276 

1277     /**

1278      * Returns the end index of the text.

1279      *

1280      * @return the index after the last character in the text

1281      */

1282     int getTextLength() {

1283         return text.length();

1284     }

1285 

1286     /**

1287      * Generates a String from this Matcher's input in the specified range.

1288      *

1289      * @param  beginIndex   the beginning index, inclusive

1290      * @param  endIndex     the ending index, exclusive

1291      * @return A String generated from this Matcher's input

1292      */

1293     CharSequence getSubSequence(int beginIndex, int endIndex) {

1294         return text.subSequence(beginIndex, endIndex);

1295     }

1296 

1297     /**

1298      * Returns this Matcher's input character at index i.

1299      *

1300      * @return A char from the specified index

1301      */

1302     char charAt(int i) {

1303         return text.charAt(i);

1304     }

1305 

1306     /**

1307      * Returns the group index of the matched capturing group.

1308      *

1309      * @return the index of the named-capturing group

1310      */

1311     int getMatchedGroupIndex(String name) {

1312         Objects.requireNonNull(name, "Group name");

1313         if (first < 0)

1314             throw new IllegalStateException("No match found");

1315         if (!parentPattern.namedGroups().containsKey(name))

1316             throw new IllegalArgumentException("No group with name <" + name + ">");

1317         return parentPattern.namedGroups().get(name);

1318     }

1319 }

1320