1 /*
2 * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
3 * ORACLE PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
4 *
5 *
6 *
7 *
8 *
9 *
10 *
11 *
12 *
13 *
14 *
15 *
16 *
17 *
18 *
19 *
20 *
21 *
22 *
23 *
24 */
25
26 // -- This file was mechanically generated: Do not edit! -- //
27
28 package java.nio.charset;
29
30 import java.nio.Buffer;
31 import java.nio.ByteBuffer;
32 import java.nio.CharBuffer;
33 import java.nio.BufferOverflowException;
34 import java.nio.BufferUnderflowException;
35 import java.lang.ref.WeakReference;
36 import java.nio.charset.CoderMalfunctionError; // javadoc
37 import java.util.Arrays;
38
39
40 /**
41 * An engine that can transform a sequence of sixteen-bit Unicode characters into a sequence of
42 * bytes in a specific charset.
43 *
44 * <a name="steps"></a>
45 *
46 * <p> The input character sequence is provided in a character buffer or a series
47 * of such buffers. The output byte sequence is written to a byte buffer
48 * or a series of such buffers. An encoder should always be used by making
49 * the following sequence of method invocations, hereinafter referred to as an
50 * <i>encoding operation</i>:
51 *
52 * <ol>
53 *
54 * <li><p> Reset the encoder via the {@link #reset reset} method, unless it
55 * has not been used before; </p></li>
56 *
57 * <li><p> Invoke the {@link #encode encode} method zero or more times, as
58 * long as additional input may be available, passing <tt>false</tt> for the
59 * <tt>endOfInput</tt> argument and filling the input buffer and flushing the
60 * output buffer between invocations; </p></li>
61 *
62 * <li><p> Invoke the {@link #encode encode} method one final time, passing
63 * <tt>true</tt> for the <tt>endOfInput</tt> argument; and then </p></li>
64 *
65 * <li><p> Invoke the {@link #flush flush} method so that the encoder can
66 * flush any internal state to the output buffer. </p></li>
67 *
68 * </ol>
69 *
70 * Each invocation of the {@link #encode encode} method will encode as many
71 * characters as possible from the input buffer, writing the resulting bytes
72 * to the output buffer. The {@link #encode encode} method returns when more
73 * input is required, when there is not enough room in the output buffer, or
74 * when an encoding error has occurred. In each case a {@link CoderResult}
75 * object is returned to describe the reason for termination. An invoker can
76 * examine this object and fill the input buffer, flush the output buffer, or
77 * attempt to recover from an encoding error, as appropriate, and try again.
78 *
79 * <a name="ce"></a>
80 *
81 * <p> There are two general types of encoding errors. If the input character
82 * sequence is not a legal sixteen-bit Unicode sequence then the input is considered <i>malformed</i>. If
83 * the input character sequence is legal but cannot be mapped to a valid
84 * byte sequence in the given charset then an <i>unmappable character</i> has been encountered.
85 *
86 * <a name="cae"></a>
87 *
88 * <p> How an encoding error is handled depends upon the action requested for
89 * that type of error, which is described by an instance of the {@link
90 * CodingErrorAction} class. The possible error actions are to {@linkplain
91 * CodingErrorAction#IGNORE ignore} the erroneous input, {@linkplain
92 * CodingErrorAction#REPORT report} the error to the invoker via
93 * the returned {@link CoderResult} object, or {@linkplain CodingErrorAction#REPLACE
94 * replace} the erroneous input with the current value of the
95 * replacement byte array. The replacement
96 *
97
98 * is initially set to the encoder's default replacement, which often
99 * (but not always) has the initial value <tt>{</tt> <tt>(byte)'?'</tt> <tt>}</tt>;
100
101
102
103
104 *
105 * its value may be changed via the {@link #replaceWith(byte[])
106 * replaceWith} method.
107 *
108 * <p> The default action for malformed-input and unmappable-character errors
109 * is to {@linkplain CodingErrorAction#REPORT report} them. The
110 * malformed-input error action may be changed via the {@link
111 * #onMalformedInput(CodingErrorAction) onMalformedInput} method; the
112 * unmappable-character action may be changed via the {@link
113 * #onUnmappableCharacter(CodingErrorAction) onUnmappableCharacter} method.
114 *
115 * <p> This class is designed to handle many of the details of the encoding
116 * process, including the implementation of error actions. An encoder for a
117 * specific charset, which is a concrete subclass of this class, need only
118 * implement the abstract {@link #encodeLoop encodeLoop} method, which
119 * encapsulates the basic encoding loop. A subclass that maintains internal
120 * state should, additionally, override the {@link #implFlush implFlush} and
121 * {@link #implReset implReset} methods.
122 *
123 * <p> Instances of this class are not safe for use by multiple concurrent
124 * threads. </p>
125 *
126 *
127 * @author Mark Reinhold
128 * @author JSR-51 Expert Group
129 * @since 1.4
130 *
131 * @see ByteBuffer
132 * @see CharBuffer
133 * @see Charset
134 * @see CharsetDecoder
135 */
136
137 public abstract class CharsetEncoder {
138
139 private final Charset charset;
140 private final float averageBytesPerChar;
141 private final float maxBytesPerChar;
142
143 private byte[] replacement;
144 private CodingErrorAction malformedInputAction
145 = CodingErrorAction.REPORT;
146 private CodingErrorAction unmappableCharacterAction
147 = CodingErrorAction.REPORT;
148
149 // Internal states
150 //
151 private static final int ST_RESET = 0;
152 private static final int ST_CODING = 1;
153 private static final int ST_END = 2;
154 private static final int ST_FLUSHED = 3;
155
156 private int state = ST_RESET;
157
158 private static String stateNames[]
159 = { "RESET", "CODING", "CODING_END", "FLUSHED" };
160
161
162 /**
163 * Initializes a new encoder. The new encoder will have the given
164 * bytes-per-char and replacement values.
165 *
166 * @param cs
167 * The charset that created this encoder
168 *
169 * @param averageBytesPerChar
170 * A positive float value indicating the expected number of
171 * bytes that will be produced for each input character
172 *
173 * @param maxBytesPerChar
174 * A positive float value indicating the maximum number of
175 * bytes that will be produced for each input character
176 *
177 * @param replacement
178 * The initial replacement; must not be <tt>null</tt>, must have
179 * non-zero length, must not be longer than maxBytesPerChar,
180 * and must be {@linkplain #isLegalReplacement legal}
181 *
182 * @throws IllegalArgumentException
183 * If the preconditions on the parameters do not hold
184 */
185 protected
186 CharsetEncoder(Charset cs,
187 float averageBytesPerChar,
188 float maxBytesPerChar,
189 byte[] replacement)
190 {
191 this.charset = cs;
192 if (averageBytesPerChar <= 0.0f)
193 throw new IllegalArgumentException("Non-positive "
194 + "averageBytesPerChar");
195 if (maxBytesPerChar <= 0.0f)
196 throw new IllegalArgumentException("Non-positive "
197 + "maxBytesPerChar");
198 if (!Charset.atBugLevel("1.4")) {
199 if (averageBytesPerChar > maxBytesPerChar)
200 throw new IllegalArgumentException("averageBytesPerChar"
201 + " exceeds "
202 + "maxBytesPerChar");
203 }
204 this.replacement = replacement;
205 this.averageBytesPerChar = averageBytesPerChar;
206 this.maxBytesPerChar = maxBytesPerChar;
207 replaceWith(replacement);
208 }
209
210 /**
211 * Initializes a new encoder. The new encoder will have the given
212 * bytes-per-char values and its replacement will be the
213 * byte array <tt>{</tt> <tt>(byte)'?'</tt> <tt>}</tt>.
214 *
215 * @param cs
216 * The charset that created this encoder
217 *
218 * @param averageBytesPerChar
219 * A positive float value indicating the expected number of
220 * bytes that will be produced for each input character
221 *
222 * @param maxBytesPerChar
223 * A positive float value indicating the maximum number of
224 * bytes that will be produced for each input character
225 *
226 * @throws IllegalArgumentException
227 * If the preconditions on the parameters do not hold
228 */
229 protected CharsetEncoder(Charset cs,
230 float averageBytesPerChar,
231 float maxBytesPerChar)
232 {
233 this(cs,
234 averageBytesPerChar, maxBytesPerChar,
235 new byte[] { (byte)'?' });
236 }
237
238 /**
239 * Returns the charset that created this encoder.
240 *
241 * @return This encoder's charset
242 */
243 public final Charset charset() {
244 return charset;
245 }
246
247 /**
248 * Returns this encoder's replacement value.
249 *
250 * @return This encoder's current replacement,
251 * which is never <tt>null</tt> and is never empty
252 */
253 public final byte[] replacement() {
254
255
256
257
258 return Arrays.copyOf(replacement, replacement.length);
259
260 }
261
262 /**
263 * Changes this encoder's replacement value.
264 *
265 * <p> This method invokes the {@link #implReplaceWith implReplaceWith}
266 * method, passing the new replacement, after checking that the new
267 * replacement is acceptable. </p>
268 *
269 * @param newReplacement The replacement value
270 *
271
272
273
274
275
276 * The new replacement; must not be <tt>null</tt>, must have
277 * non-zero length, must not be longer than the value returned by
278 * the {@link #maxBytesPerChar() maxBytesPerChar} method, and
279 * must be {@link #isLegalReplacement legal}
280
281 *
282 * @return This encoder
283 *
284 * @throws IllegalArgumentException
285 * If the preconditions on the parameter do not hold
286 */
287 public final CharsetEncoder replaceWith(byte[] newReplacement) {
288 if (newReplacement == null)
289 throw new IllegalArgumentException("Null replacement");
290 int len = newReplacement.length;
291 if (len == 0)
292 throw new IllegalArgumentException("Empty replacement");
293 if (len > maxBytesPerChar)
294 throw new IllegalArgumentException("Replacement too long");
295
296
297
298
299 if (!isLegalReplacement(newReplacement))
300 throw new IllegalArgumentException("Illegal replacement");
301 this.replacement = Arrays.copyOf(newReplacement, newReplacement.length);
302
303 implReplaceWith(this.replacement);
304 return this;
305 }
306
307 /**
308 * Reports a change to this encoder's replacement value.
309 *
310 * <p> The default implementation of this method does nothing. This method
311 * should be overridden by encoders that require notification of changes to
312 * the replacement. </p>
313 *
314 * @param newReplacement The replacement value
315 */
316 protected void implReplaceWith(byte[] newReplacement) {
317 }
318
319
320
321 private WeakReference<CharsetDecoder> cachedDecoder = null;
322
323 /**
324 * Tells whether or not the given byte array is a legal replacement value
325 * for this encoder.
326 *
327 * <p> A replacement is legal if, and only if, it is a legal sequence of
328 * bytes in this encoder's charset; that is, it must be possible to decode
329 * the replacement into one or more sixteen-bit Unicode characters.
330 *
331 * <p> The default implementation of this method is not very efficient; it
332 * should generally be overridden to improve performance. </p>
333 *
334 * @param repl The byte array to be tested
335 *
336 * @return <tt>true</tt> if, and only if, the given byte array
337 * is a legal replacement value for this encoder
338 */
339 public boolean isLegalReplacement(byte[] repl) {
340 WeakReference<CharsetDecoder> wr = cachedDecoder;
341 CharsetDecoder dec = null;
342 if ((wr == null) || ((dec = wr.get()) == null)) {
343 dec = charset().newDecoder();
344 dec.onMalformedInput(CodingErrorAction.REPORT);
345 dec.onUnmappableCharacter(CodingErrorAction.REPORT);
346 cachedDecoder = new WeakReference<CharsetDecoder>(dec);
347 } else {
348 dec.reset();
349 }
350 ByteBuffer bb = ByteBuffer.wrap(repl);
351 CharBuffer cb = CharBuffer.allocate((int)(bb.remaining()
352 * dec.maxCharsPerByte()));
353 CoderResult cr = dec.decode(bb, cb, true);
354 return !cr.isError();
355 }
356
357
358
359 /**
360 * Returns this encoder's current action for malformed-input errors.
361 *
362 * @return The current malformed-input action, which is never <tt>null</tt>
363 */
364 public CodingErrorAction malformedInputAction() {
365 return malformedInputAction;
366 }
367
368 /**
369 * Changes this encoder's action for malformed-input errors.
370 *
371 * <p> This method invokes the {@link #implOnMalformedInput
372 * implOnMalformedInput} method, passing the new action. </p>
373 *
374 * @param newAction The new action; must not be <tt>null</tt>
375 *
376 * @return This encoder
377 *
378 * @throws IllegalArgumentException
379 * If the precondition on the parameter does not hold
380 */
381 public final CharsetEncoder onMalformedInput(CodingErrorAction newAction) {
382 if (newAction == null)
383 throw new IllegalArgumentException("Null action");
384 malformedInputAction = newAction;
385 implOnMalformedInput(newAction);
386 return this;
387 }
388
389 /**
390 * Reports a change to this encoder's malformed-input action.
391 *
392 * <p> The default implementation of this method does nothing. This method
393 * should be overridden by encoders that require notification of changes to
394 * the malformed-input action. </p>
395 *
396 * @param newAction The new action
397 */
398 protected void implOnMalformedInput(CodingErrorAction newAction) { }
399
400 /**
401 * Returns this encoder's current action for unmappable-character errors.
402 *
403 * @return The current unmappable-character action, which is never
404 * <tt>null</tt>
405 */
406 public CodingErrorAction unmappableCharacterAction() {
407 return unmappableCharacterAction;
408 }
409
410 /**
411 * Changes this encoder's action for unmappable-character errors.
412 *
413 * <p> This method invokes the {@link #implOnUnmappableCharacter
414 * implOnUnmappableCharacter} method, passing the new action. </p>
415 *
416 * @param newAction The new action; must not be <tt>null</tt>
417 *
418 * @return This encoder
419 *
420 * @throws IllegalArgumentException
421 * If the precondition on the parameter does not hold
422 */
423 public final CharsetEncoder onUnmappableCharacter(CodingErrorAction
424 newAction)
425 {
426 if (newAction == null)
427 throw new IllegalArgumentException("Null action");
428 unmappableCharacterAction = newAction;
429 implOnUnmappableCharacter(newAction);
430 return this;
431 }
432
433 /**
434 * Reports a change to this encoder's unmappable-character action.
435 *
436 * <p> The default implementation of this method does nothing. This method
437 * should be overridden by encoders that require notification of changes to
438 * the unmappable-character action. </p>
439 *
440 * @param newAction The new action
441 */
442 protected void implOnUnmappableCharacter(CodingErrorAction newAction) { }
443
444 /**
445 * Returns the average number of bytes that will be produced for each
446 * character of input. This heuristic value may be used to estimate the size
447 * of the output buffer required for a given input sequence.
448 *
449 * @return The average number of bytes produced
450 * per character of input
451 */
452 public final float averageBytesPerChar() {
453 return averageBytesPerChar;
454 }
455
456 /**
457 * Returns the maximum number of bytes that will be produced for each
458 * character of input. This value may be used to compute the worst-case size
459 * of the output buffer required for a given input sequence.
460 *
461 * @return The maximum number of bytes that will be produced per
462 * character of input
463 */
464 public final float maxBytesPerChar() {
465 return maxBytesPerChar;
466 }
467
468 /**
469 * Encodes as many characters as possible from the given input buffer,
470 * writing the results to the given output buffer.
471 *
472 * <p> The buffers are read from, and written to, starting at their current
473 * positions. At most {@link Buffer#remaining in.remaining()} characters
474 * will be read and at most {@link Buffer#remaining out.remaining()}
475 * bytes will be written. The buffers' positions will be advanced to
476 * reflect the characters read and the bytes written, but their marks and
477 * limits will not be modified.
478 *
479 * <p> In addition to reading characters from the input buffer and writing
480 * bytes to the output buffer, this method returns a {@link CoderResult}
481 * object to describe its reason for termination:
482 *
483 * <ul>
484 *
485 * <li><p> {@link CoderResult#UNDERFLOW} indicates that as much of the
486 * input buffer as possible has been encoded. If there is no further
487 * input then the invoker can proceed to the next step of the
488 * <a href="#steps">encoding operation</a>. Otherwise this method
489 * should be invoked again with further input. </p></li>
490 *
491 * <li><p> {@link CoderResult#OVERFLOW} indicates that there is
492 * insufficient space in the output buffer to encode any more characters.
493 * This method should be invoked again with an output buffer that has
494 * more {@linkplain Buffer#remaining remaining} bytes. This is
495 * typically done by draining any encoded bytes from the output
496 * buffer. </p></li>
497 *
498 * <li><p> A {@linkplain CoderResult#malformedForLength
499 * malformed-input} result indicates that a malformed-input
500 * error has been detected. The malformed characters begin at the input
501 * buffer's (possibly incremented) position; the number of malformed
502 * characters may be determined by invoking the result object's {@link
503 * CoderResult#length() length} method. This case applies only if the
504 * {@linkplain #onMalformedInput malformed action} of this encoder
505 * is {@link CodingErrorAction#REPORT}; otherwise the malformed input
506 * will be ignored or replaced, as requested. </p></li>
507 *
508 * <li><p> An {@linkplain CoderResult#unmappableForLength
509 * unmappable-character} result indicates that an
510 * unmappable-character error has been detected. The characters that
511 * encode the unmappable character begin at the input buffer's (possibly
512 * incremented) position; the number of such characters may be determined
513 * by invoking the result object's {@link CoderResult#length() length}
514 * method. This case applies only if the {@linkplain #onUnmappableCharacter
515 * unmappable action} of this encoder is {@link
516 * CodingErrorAction#REPORT}; otherwise the unmappable character will be
517 * ignored or replaced, as requested. </p></li>
518 *
519 * </ul>
520 *
521 * In any case, if this method is to be reinvoked in the same encoding
522 * operation then care should be taken to preserve any characters remaining
523 * in the input buffer so that they are available to the next invocation.
524 *
525 * <p> The <tt>endOfInput</tt> parameter advises this method as to whether
526 * the invoker can provide further input beyond that contained in the given
527 * input buffer. If there is a possibility of providing additional input
528 * then the invoker should pass <tt>false</tt> for this parameter; if there
529 * is no possibility of providing further input then the invoker should
530 * pass <tt>true</tt>. It is not erroneous, and in fact it is quite
531 * common, to pass <tt>false</tt> in one invocation and later discover that
532 * no further input was actually available. It is critical, however, that
533 * the final invocation of this method in a sequence of invocations always
534 * pass <tt>true</tt> so that any remaining unencoded input will be treated
535 * as being malformed.
536 *
537 * <p> This method works by invoking the {@link #encodeLoop encodeLoop}
538 * method, interpreting its results, handling error conditions, and
539 * reinvoking it as necessary. </p>
540 *
541 *
542 * @param in
543 * The input character buffer
544 *
545 * @param out
546 * The output byte buffer
547 *
548 * @param endOfInput
549 * <tt>true</tt> if, and only if, the invoker can provide no
550 * additional input characters beyond those in the given buffer
551 *
552 * @return A coder-result object describing the reason for termination
553 *
554 * @throws IllegalStateException
555 * If an encoding operation is already in progress and the previous
556 * step was an invocation neither of the {@link #reset reset}
557 * method, nor of this method with a value of <tt>false</tt> for
558 * the <tt>endOfInput</tt> parameter, nor of this method with a
559 * value of <tt>true</tt> for the <tt>endOfInput</tt> parameter
560 * but a return value indicating an incomplete encoding operation
561 *
562 * @throws CoderMalfunctionError
563 * If an invocation of the encodeLoop method threw
564 * an unexpected exception
565 */
566 public final CoderResult encode(CharBuffer in, ByteBuffer out,
567 boolean endOfInput)
568 {
569 int newState = endOfInput ? ST_END : ST_CODING;
570 if ((state != ST_RESET) && (state != ST_CODING)
571 && !(endOfInput && (state == ST_END)))
572 throwIllegalStateException(state, newState);
573 state = newState;
574
575 for (;;) {
576
577 CoderResult cr;
578 try {
579 cr = encodeLoop(in, out);
580 } catch (BufferUnderflowException x) {
581 throw new CoderMalfunctionError(x);
582 } catch (BufferOverflowException x) {
583 throw new CoderMalfunctionError(x);
584 }
585
586 if (cr.isOverflow())
587 return cr;
588
589 if (cr.isUnderflow()) {
590 if (endOfInput && in.hasRemaining()) {
591 cr = CoderResult.malformedForLength(in.remaining());
592 // Fall through to malformed-input case
593 } else {
594 return cr;
595 }
596 }
597
598 CodingErrorAction action = null;
599 if (cr.isMalformed())
600 action = malformedInputAction;
601 else if (cr.isUnmappable())
602 action = unmappableCharacterAction;
603 else
604 assert false : cr.toString();
605
606 if (action == CodingErrorAction.REPORT)
607 return cr;
608
609 if (action == CodingErrorAction.REPLACE) {
610 if (out.remaining() < replacement.length)
611 return CoderResult.OVERFLOW;
612 out.put(replacement);
613 }
614
615 if ((action == CodingErrorAction.IGNORE)
616 || (action == CodingErrorAction.REPLACE)) {
617 // Skip erroneous input either way
618 in.position(in.position() + cr.length());
619 continue;
620 }
621
622 assert false;
623 }
624
625 }
626
627 /**
628 * Flushes this encoder.
629 *
630 * <p> Some encoders maintain internal state and may need to write some
631 * final bytes to the output buffer once the overall input sequence has
632 * been read.
633 *
634 * <p> Any additional output is written to the output buffer beginning at
635 * its current position. At most {@link Buffer#remaining out.remaining()}
636 * bytes will be written. The buffer's position will be advanced
637 * appropriately, but its mark and limit will not be modified.
638 *
639 * <p> If this method completes successfully then it returns {@link
640 * CoderResult#UNDERFLOW}. If there is insufficient room in the output
641 * buffer then it returns {@link CoderResult#OVERFLOW}. If this happens
642 * then this method must be invoked again, with an output buffer that has
643 * more room, in order to complete the current <a href="#steps">encoding
644 * operation</a>.
645 *
646 * <p> If this encoder has already been flushed then invoking this method
647 * has no effect.
648 *
649 * <p> This method invokes the {@link #implFlush implFlush} method to
650 * perform the actual flushing operation. </p>
651 *
652 * @param out
653 * The output byte buffer
654 *
655 * @return A coder-result object, either {@link CoderResult#UNDERFLOW} or
656 * {@link CoderResult#OVERFLOW}
657 *
658 * @throws IllegalStateException
659 * If the previous step of the current encoding operation was an
660 * invocation neither of the {@link #flush flush} method nor of
661 * the three-argument {@link
662 * #encode(CharBuffer,ByteBuffer,boolean) encode} method
663 * with a value of <tt>true</tt> for the <tt>endOfInput</tt>
664 * parameter
665 */
666 public final CoderResult flush(ByteBuffer out) {
667 if (state == ST_END) {
668 CoderResult cr = implFlush(out);
669 if (cr.isUnderflow())
670 state = ST_FLUSHED;
671 return cr;
672 }
673
674 if (state != ST_FLUSHED)
675 throwIllegalStateException(state, ST_FLUSHED);
676
677 return CoderResult.UNDERFLOW; // Already flushed
678 }
679
680 /**
681 * Flushes this encoder.
682 *
683 * <p> The default implementation of this method does nothing, and always
684 * returns {@link CoderResult#UNDERFLOW}. This method should be overridden
685 * by encoders that may need to write final bytes to the output buffer
686 * once the entire input sequence has been read. </p>
687 *
688 * @param out
689 * The output byte buffer
690 *
691 * @return A coder-result object, either {@link CoderResult#UNDERFLOW} or
692 * {@link CoderResult#OVERFLOW}
693 */
694 protected CoderResult implFlush(ByteBuffer out) {
695 return CoderResult.UNDERFLOW;
696 }
697
698 /**
699 * Resets this encoder, clearing any internal state.
700 *
701 * <p> This method resets charset-independent state and also invokes the
702 * {@link #implReset() implReset} method in order to perform any
703 * charset-specific reset actions. </p>
704 *
705 * @return This encoder
706 *
707 */
708 public final CharsetEncoder reset() {
709 implReset();
710 state = ST_RESET;
711 return this;
712 }
713
714 /**
715 * Resets this encoder, clearing any charset-specific internal state.
716 *
717 * <p> The default implementation of this method does nothing. This method
718 * should be overridden by encoders that maintain internal state. </p>
719 */
720 protected void implReset() { }
721
722 /**
723 * Encodes one or more characters into one or more bytes.
724 *
725 * <p> This method encapsulates the basic encoding loop, encoding as many
726 * characters as possible until it either runs out of input, runs out of room
727 * in the output buffer, or encounters an encoding error. This method is
728 * invoked by the {@link #encode encode} method, which handles result
729 * interpretation and error recovery.
730 *
731 * <p> The buffers are read from, and written to, starting at their current
732 * positions. At most {@link Buffer#remaining in.remaining()} characters
733 * will be read, and at most {@link Buffer#remaining out.remaining()}
734 * bytes will be written. The buffers' positions will be advanced to
735 * reflect the characters read and the bytes written, but their marks and
736 * limits will not be modified.
737 *
738 * <p> This method returns a {@link CoderResult} object to describe its
739 * reason for termination, in the same manner as the {@link #encode encode}
740 * method. Most implementations of this method will handle encoding errors
741 * by returning an appropriate result object for interpretation by the
742 * {@link #encode encode} method. An optimized implementation may instead
743 * examine the relevant error action and implement that action itself.
744 *
745 * <p> An implementation of this method may perform arbitrary lookahead by
746 * returning {@link CoderResult#UNDERFLOW} until it receives sufficient
747 * input. </p>
748 *
749 * @param in
750 * The input character buffer
751 *
752 * @param out
753 * The output byte buffer
754 *
755 * @return A coder-result object describing the reason for termination
756 */
757 protected abstract CoderResult encodeLoop(CharBuffer in,
758 ByteBuffer out);
759
760 /**
761 * Convenience method that encodes the remaining content of a single input
762 * character buffer into a newly-allocated byte buffer.
763 *
764 * <p> This method implements an entire <a href="#steps">encoding
765 * operation</a>; that is, it resets this encoder, then it encodes the
766 * characters in the given character buffer, and finally it flushes this
767 * encoder. This method should therefore not be invoked if an encoding
768 * operation is already in progress. </p>
769 *
770 * @param in
771 * The input character buffer
772 *
773 * @return A newly-allocated byte buffer containing the result of the
774 * encoding operation. The buffer's position will be zero and its
775 * limit will follow the last byte written.
776 *
777 * @throws IllegalStateException
778 * If an encoding operation is already in progress
779 *
780 * @throws MalformedInputException
781 * If the character sequence starting at the input buffer's current
782 * position is not a legal sixteen-bit Unicode sequence and the current malformed-input action
783 * is {@link CodingErrorAction#REPORT}
784 *
785 * @throws UnmappableCharacterException
786 * If the character sequence starting at the input buffer's current
787 * position cannot be mapped to an equivalent byte sequence and
788 * the current unmappable-character action is {@link
789 * CodingErrorAction#REPORT}
790 */
791 public final ByteBuffer encode(CharBuffer in)
792 throws CharacterCodingException
793 {
794 int n = (int)(in.remaining() * averageBytesPerChar());
795 ByteBuffer out = ByteBuffer.allocate(n);
796
797 if ((n == 0) && (in.remaining() == 0))
798 return out;
799 reset();
800 for (;;) {
801 CoderResult cr = in.hasRemaining() ?
802 encode(in, out, true) : CoderResult.UNDERFLOW;
803 if (cr.isUnderflow())
804 cr = flush(out);
805
806 if (cr.isUnderflow())
807 break;
808 if (cr.isOverflow()) {
809 n = 2*n + 1; // Ensure progress; n might be 0!
810 ByteBuffer o = ByteBuffer.allocate(n);
811 out.flip();
812 o.put(out);
813 out = o;
814 continue;
815 }
816 cr.throwException();
817 }
818 out.flip();
819 return out;
820 }
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900 private boolean canEncode(CharBuffer cb) {
901 if (state == ST_FLUSHED)
902 reset();
903 else if (state != ST_RESET)
904 throwIllegalStateException(state, ST_CODING);
905 CodingErrorAction ma = malformedInputAction();
906 CodingErrorAction ua = unmappableCharacterAction();
907 try {
908 onMalformedInput(CodingErrorAction.REPORT);
909 onUnmappableCharacter(CodingErrorAction.REPORT);
910 encode(cb);
911 } catch (CharacterCodingException x) {
912 return false;
913 } finally {
914 onMalformedInput(ma);
915 onUnmappableCharacter(ua);
916 reset();
917 }
918 return true;
919 }
920
921 /**
922 * Tells whether or not this encoder can encode the given character.
923 *
924 * <p> This method returns <tt>false</tt> if the given character is a
925 * surrogate character; such characters can be interpreted only when they
926 * are members of a pair consisting of a high surrogate followed by a low
927 * surrogate. The {@link #canEncode(java.lang.CharSequence)
928 * canEncode(CharSequence)} method may be used to test whether or not a
929 * character sequence can be encoded.
930 *
931 * <p> This method may modify this encoder's state; it should therefore not
932 * be invoked if an <a href="#steps">encoding operation</a> is already in
933 * progress.
934 *
935 * <p> The default implementation of this method is not very efficient; it
936 * should generally be overridden to improve performance. </p>
937 *
938 * @param c
939 * The given character
940 *
941 * @return <tt>true</tt> if, and only if, this encoder can encode
942 * the given character
943 *
944 * @throws IllegalStateException
945 * If an encoding operation is already in progress
946 */
947 public boolean canEncode(char c) {
948 CharBuffer cb = CharBuffer.allocate(1);
949 cb.put(c);
950 cb.flip();
951 return canEncode(cb);
952 }
953
954 /**
955 * Tells whether or not this encoder can encode the given character
956 * sequence.
957 *
958 * <p> If this method returns <tt>false</tt> for a particular character
959 * sequence then more information about why the sequence cannot be encoded
960 * may be obtained by performing a full <a href="#steps">encoding
961 * operation</a>.
962 *
963 * <p> This method may modify this encoder's state; it should therefore not
964 * be invoked if an encoding operation is already in progress.
965 *
966 * <p> The default implementation of this method is not very efficient; it
967 * should generally be overridden to improve performance. </p>
968 *
969 * @param cs
970 * The given character sequence
971 *
972 * @return <tt>true</tt> if, and only if, this encoder can encode
973 * the given character without throwing any exceptions and without
974 * performing any replacements
975 *
976 * @throws IllegalStateException
977 * If an encoding operation is already in progress
978 */
979 public boolean canEncode(CharSequence cs) {
980 CharBuffer cb;
981 if (cs instanceof CharBuffer)
982 cb = ((CharBuffer)cs).duplicate();
983 else
984 cb = CharBuffer.wrap(cs.toString());
985 return canEncode(cb);
986 }
987
988
989
990
991 private void throwIllegalStateException(int from, int to) {
992 throw new IllegalStateException("Current state = " + stateNames[from]
993 + ", new state = " + stateNames[to]);
994 }
995
996 }
997