001 /**
002
003 * ========================================
004
005 * JFreeReport : a free Java report library
006
007 * ========================================
008
009 *
010
011 * Project Info: http://reporting.pentaho.org/
012
013 *
014
015 * (C) Copyright 2000-2007, by Object Refinery Limited, Pentaho Corporation and Contributors.
016
017 *
018
019 * This library is free software; you can redistribute it and/or modify it under the terms
020
021 * of the GNU Lesser General Public License as published by the Free Software Foundation;
022
023 * either version 2.1 of the License, or (at your option) any later version.
024
025 *
026
027 * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
028
029 * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
030
031 * See the GNU Lesser General Public License for more details.
032
033 *
034
035 * You should have received a copy of the GNU Lesser General Public License along with this
036
037 * library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
038
039 * Boston, MA 02111-1307, USA.
040
041 *
042
043 * [Java is a trademark or registered trademark of Sun Microsystems, Inc.
044
045 * in the United States and other countries.]
046
047 *
048
049 * ------------
050
051 * $Id: CharacterEntityParser.java,v 1.12 2007/04/01 18:49:33 taqua Exp $
052
053 * ------------
054
055 * (C) Copyright 2000-2005, by Object Refinery Limited.
056
057 * (C) Copyright 2005-2007, by Pentaho Corporation.
058
059 */
060
061 package org.jfree.report.util;
062
063
064
065 import java.util.Enumeration;
066
067 import java.util.Properties;
068
069
070
071 /**
072
073 * The character entity parser replaces all known occurrences of an entity in the format
074
075 * &entityname;.
076
077 *
078
079 * @author Thomas Morgner
080
081 */
082
083 public class CharacterEntityParser
084
085 {
086
087 /**
088
089 * the entities, keyed by entity name.
090
091 */
092
093 private final Properties entities;
094
095
096
097 /**
098
099 * the reverse lookup entities, keyed by character.
100
101 */
102
103 private final Properties reverse;
104
105
106
107 /**
108
109 * Creates a new CharacterEntityParser and initializes the parser with the given set of
110
111 * entities.
112
113 *
114
115 * @param characterEntities the entities used for the parser
116
117 */
118
119 public CharacterEntityParser (final Properties characterEntities)
120
121 {
122
123 entities = characterEntities;
124
125 reverse = new Properties();
126
127 final Enumeration keys = entities.keys();
128
129 while (keys.hasMoreElements())
130
131 {
132
133 final String key = (String) keys.nextElement();
134
135 final String value = entities.getProperty(key);
136
137 reverse.setProperty(value, key);
138
139 }
140
141 }
142
143
144
145 /**
146
147 * create a new Character entity parser and initializes the parser with the entities
148
149 * defined in the XML standard.
150
151 *
152
153 * @return the CharacterEntityParser initialized with XML entities.
154
155 */
156
157 public static CharacterEntityParser createXMLEntityParser ()
158
159 {
160
161 final Properties entities = new Properties();
162
163 entities.setProperty("amp", "&");
164
165 entities.setProperty("quot", "\"");
166
167 entities.setProperty("lt", "<");
168
169 entities.setProperty("gt", ">");
170
171 entities.setProperty("apos", "\u0027");
172
173 return new CharacterEntityParser(entities);
174
175 }
176
177
178
179 /**
180
181 * returns the entities used in the parser.
182
183 *
184
185 * @return the properties for this parser.
186
187 */
188
189 private Properties getEntities ()
190
191 {
192
193 return entities;
194
195 }
196
197
198
199 /**
200
201 * returns the reverse-lookup table for the entities.
202
203 *
204
205 * @return the reverse-lookup properties for this parsers.
206
207 */
208
209 private Properties getReverse ()
210
211 {
212
213 return reverse;
214
215 }
216
217
218
219 /**
220
221 * Looks up the character for the entity name specified in <code>key</code>.
222
223 *
224
225 * @param key the entity name
226
227 * @return the character as string with a length of 1
228
229 */
230
231 private String lookupCharacter (final String key)
232
233 {
234
235 return getEntities().getProperty(key);
236
237 }
238
239
240
241 /**
242
243 * Performs a reverse lookup, to retrieve the entity name for a given character.
244
245 *
246
247 * @param character the character that should be translated into the entity
248
249 * @return the entity name for the character or the untranslated character.
250
251 */
252
253 private String lookupEntity (final String character)
254
255 {
256
257 final String val = getReverse().getProperty(character);
258
259 if (val == null)
260
261 {
262
263 return null;
264
265 }
266
267 else
268
269 {
270
271 return "&" + val + ";";
272
273 }
274
275 }
276
277
278
279 /**
280
281 * Encode the given String, so that all known entites are encoded. All characters
282
283 * represented by these entites are now removed from the string.
284
285 *
286
287 * @param value the original string
288
289 * @return the encoded string.
290
291 */
292
293 public String encodeEntities (final String value)
294
295 {
296
297 final StringBuffer writer = new StringBuffer();
298
299 for (int i = 0; i < value.length(); i++)
300
301 {
302
303 final String character = String.valueOf(value.charAt(i));
304
305 final String lookup = lookupEntity(character);
306
307 if (lookup == null)
308
309 {
310
311 writer.append(character);
312
313 }
314
315 else
316
317 {
318
319 writer.append(lookup);
320
321 }
322
323 }
324
325 return writer.toString();
326
327 }
328
329
330
331 /**
332
333 * Decode the string, all known entities are replaced by their resolved characters.
334
335 *
336
337 * @param value the string that should be decoded.
338
339 * @return the decoded string.
340
341 */
342
343 public String decodeEntities (final String value)
344
345 {
346
347 int parserIndex = 0;
348
349 int subStart = value.indexOf("&", parserIndex);
350
351 if (subStart == -1)
352
353 {
354
355 return value;
356
357 }
358
359 int subEnd = value.indexOf(";", subStart);
360
361 if (subEnd == -1)
362
363 {
364
365 return value;
366
367 }
368
369
370
371 final StringBuffer bufValue = new StringBuffer(value.substring(0, subStart));
372
373 do
374
375 {
376
377 // at this point we know, that there is at least one entity ..
378
379 if (value.charAt(subStart + 1) == '#')
380
381 {
382
383 final int subValue = TextUtilities.parseInt(value.substring(subStart + 2, subEnd), 0);
384
385 if ((subValue >= 1) && (subValue <= 65536))
386
387 {
388
389 final char[] chr = new char[1];
390
391 chr[0] = (char) subValue;
392
393 bufValue.append(chr);
394
395 }
396
397 else
398
399 {
400
401 // invalid entity, do not decode ..
402
403 bufValue.append(value.substring(subStart, subEnd));
404
405 }
406
407 }
408
409 else
410
411 {
412
413 final String entity = value.substring(subStart + 1, subEnd);
414
415 final String replaceString = lookupCharacter(entity);
416
417 if (replaceString != null)
418
419 {
420
421 bufValue.append(decodeEntities(replaceString));
422
423 }
424
425 else
426
427 {
428
429 bufValue.append("&");
430
431 bufValue.append(entity);
432
433 bufValue.append(";");
434
435 }
436
437 }
438
439 parserIndex = subEnd + 1;
440
441 subStart = value.indexOf("&", parserIndex);
442
443 if (subStart == -1)
444
445 {
446
447 bufValue.append(value.substring(parserIndex));
448
449 subEnd = -1;
450
451 }
452
453 else
454
455 {
456
457 subEnd = value.indexOf(";", subStart);
458
459 if (subEnd == -1)
460
461 {
462
463 bufValue.append(value.substring(parserIndex));
464
465 }
466
467 else
468
469 {
470
471 bufValue.append(value.substring(parserIndex, subStart));
472
473 }
474
475 }
476
477 }
478
479 while (subStart != -1 && subEnd != -1);
480
481
482
483 return bufValue.toString();
484
485 }
486
487 }
488
489
490