|
1 |
| |
|
2 |
| |
|
3 |
| |
|
4 |
| |
|
5 |
| |
|
6 |
| |
|
7 |
| |
|
8 |
| |
|
9 |
| |
|
10 |
| |
|
11 |
| |
|
12 |
| |
|
13 |
| |
|
14 |
| |
|
15 |
| |
|
16 |
| |
|
17 |
| |
|
18 |
| |
|
19 |
| |
|
20 |
| |
|
21 |
| |
|
22 |
| |
|
23 |
| |
|
24 |
| |
|
25 |
| |
|
26 |
| |
|
27 |
| |
|
28 |
| |
|
29 |
| |
|
30 |
| |
|
31 |
| |
|
32 |
| |
|
33 |
| |
|
34 |
| |
|
35 |
| |
|
36 |
| |
|
37 |
| |
|
38 |
| |
|
39 |
| |
|
40 |
| |
|
41 |
| |
|
42 |
| |
|
43 |
| |
|
44 |
| |
|
45 |
| |
|
46 |
| |
|
47 |
| |
|
48 |
| |
|
49 |
| |
|
50 |
| |
|
51 |
| |
|
52 |
| |
|
53 |
| |
|
54 |
| package org.w3c.tidy; |
|
55 |
| |
|
56 |
| |
|
57 |
| |
|
58 |
| |
|
59 |
| |
|
60 |
| |
|
61 |
| |
|
62 |
| |
|
63 |
| |
|
64 |
| |
|
65 |
| |
|
66 |
| |
|
67 |
| |
|
68 |
| |
|
69 |
| |
|
70 |
| |
|
71 |
| |
|
72 |
| |
|
73 |
| |
|
74 |
| |
|
75 |
| |
|
76 |
| |
|
77 |
| |
|
78 |
| |
|
79 |
| |
|
80 |
| |
|
81 |
| |
|
82 |
| |
|
83 |
| |
|
84 |
| |
|
85 |
| public class Clean |
|
86 |
| { |
|
87 |
| |
|
88 |
| |
|
89 |
| |
|
90 |
| |
|
91 |
| private int classNum = 1; |
|
92 |
| |
|
93 |
| |
|
94 |
| |
|
95 |
| |
|
96 |
| private TagTable tt; |
|
97 |
| |
|
98 |
| |
|
99 |
| |
|
100 |
| |
|
101 |
| |
|
102 |
219
| public Clean(TagTable tagTable)
|
|
103 |
| { |
|
104 |
219
| this.tt = tagTable;
|
|
105 |
| } |
|
106 |
| |
|
107 |
| |
|
108 |
| |
|
109 |
| |
|
110 |
| |
|
111 |
| |
|
112 |
| |
|
113 |
| |
|
114 |
2
| private StyleProp insertProperty(StyleProp props, String name, String value)
|
|
115 |
| { |
|
116 |
2
| StyleProp first, prev, prop;
|
|
117 |
2
| int cmp;
|
|
118 |
| |
|
119 |
2
| prev = null;
|
|
120 |
2
| first = props;
|
|
121 |
| |
|
122 |
2
| while (props != null)
|
|
123 |
| { |
|
124 |
1
| cmp = props.name.compareTo(name);
|
|
125 |
| |
|
126 |
1
| if (cmp == 0)
|
|
127 |
| { |
|
128 |
| |
|
129 |
0
| return first;
|
|
130 |
| } |
|
131 |
| |
|
132 |
1
| if (cmp > 0)
|
|
133 |
| { |
|
134 |
| |
|
135 |
| |
|
136 |
1
| prop = new StyleProp(name, value, props);
|
|
137 |
| |
|
138 |
1
| if (prev != null)
|
|
139 |
| { |
|
140 |
0
| prev.next = prop;
|
|
141 |
| } |
|
142 |
| else |
|
143 |
| { |
|
144 |
1
| first = prop;
|
|
145 |
| } |
|
146 |
| |
|
147 |
1
| return first;
|
|
148 |
| } |
|
149 |
| |
|
150 |
0
| prev = props;
|
|
151 |
0
| props = props.next;
|
|
152 |
| } |
|
153 |
| |
|
154 |
1
| prop = new StyleProp(name, value, null);
|
|
155 |
| |
|
156 |
1
| if (prev != null)
|
|
157 |
| { |
|
158 |
0
| prev.next = prop;
|
|
159 |
| } |
|
160 |
| else |
|
161 |
| { |
|
162 |
1
| first = prop;
|
|
163 |
| } |
|
164 |
| |
|
165 |
1
| return first;
|
|
166 |
| } |
|
167 |
| |
|
168 |
| |
|
169 |
| |
|
170 |
| |
|
171 |
| |
|
172 |
| |
|
173 |
| |
|
174 |
2
| private StyleProp createProps(StyleProp prop, String style)
|
|
175 |
| { |
|
176 |
2
| int nameEnd;
|
|
177 |
2
| int valueEnd;
|
|
178 |
2
| int valueStart = 0;
|
|
179 |
2
| int nameStart = 0;
|
|
180 |
2
| boolean more;
|
|
181 |
| |
|
182 |
2
| nameStart = 0;
|
|
183 |
2
| while (nameStart < style.length())
|
|
184 |
| { |
|
185 |
2
| while (nameStart < style.length() && style.charAt(nameStart) == ' ')
|
|
186 |
| { |
|
187 |
0
| ++nameStart;
|
|
188 |
| } |
|
189 |
| |
|
190 |
2
| nameEnd = nameStart;
|
|
191 |
| |
|
192 |
22
| while (nameEnd < style.length())
|
|
193 |
| { |
|
194 |
22
| if (style.charAt(nameEnd) == ':')
|
|
195 |
| { |
|
196 |
2
| valueStart = nameEnd + 1;
|
|
197 |
2
| break;
|
|
198 |
| } |
|
199 |
| |
|
200 |
20
| ++nameEnd;
|
|
201 |
| } |
|
202 |
| |
|
203 |
2
| if (nameEnd >= style.length() || style.charAt(nameEnd) != ':')
|
|
204 |
| { |
|
205 |
0
| break;
|
|
206 |
| } |
|
207 |
| |
|
208 |
2
| while (valueStart < style.length() && style.charAt(valueStart) == ' ')
|
|
209 |
| { |
|
210 |
2
| ++valueStart;
|
|
211 |
| } |
|
212 |
| |
|
213 |
2
| valueEnd = valueStart;
|
|
214 |
2
| more = false;
|
|
215 |
| |
|
216 |
2
| while (valueEnd < style.length())
|
|
217 |
| { |
|
218 |
26
| if (style.charAt(valueEnd) == ';')
|
|
219 |
| { |
|
220 |
0
| more = true;
|
|
221 |
0
| break;
|
|
222 |
| } |
|
223 |
| |
|
224 |
26
| ++valueEnd;
|
|
225 |
| } |
|
226 |
| |
|
227 |
2
| prop = insertProperty(prop, style.substring(nameStart, nameEnd), style.substring(valueStart, valueEnd));
|
|
228 |
| |
|
229 |
2
| if (more)
|
|
230 |
| { |
|
231 |
0
| nameStart = valueEnd + 1;
|
|
232 |
0
| continue;
|
|
233 |
| } |
|
234 |
| |
|
235 |
2
| break;
|
|
236 |
| } |
|
237 |
| |
|
238 |
2
| return prop;
|
|
239 |
| } |
|
240 |
| |
|
241 |
| |
|
242 |
| |
|
243 |
| |
|
244 |
| |
|
245 |
| |
|
246 |
1
| private String createPropString(StyleProp props)
|
|
247 |
| { |
|
248 |
1
| String style = "";
|
|
249 |
1
| int len;
|
|
250 |
1
| StyleProp prop;
|
|
251 |
| |
|
252 |
| |
|
253 |
1
| for (len = 0, prop = props; prop != null; prop = prop.next)
|
|
254 |
| { |
|
255 |
2
| len += prop.name.length() + 2;
|
|
256 |
2
| len += prop.value.length() + 2;
|
|
257 |
| } |
|
258 |
| |
|
259 |
2
| for (prop = props; prop != null; prop = prop.next)
|
|
260 |
| { |
|
261 |
2
| style = style.concat(prop.name);
|
|
262 |
2
| style = style.concat(": ");
|
|
263 |
| |
|
264 |
2
| style = style.concat(prop.value);
|
|
265 |
| |
|
266 |
2
| if (prop.next == null)
|
|
267 |
| { |
|
268 |
1
| break;
|
|
269 |
| } |
|
270 |
| |
|
271 |
1
| style = style.concat("; ");
|
|
272 |
| } |
|
273 |
| |
|
274 |
1
| return style;
|
|
275 |
| } |
|
276 |
| |
|
277 |
| |
|
278 |
| |
|
279 |
| |
|
280 |
| |
|
281 |
| |
|
282 |
| |
|
283 |
1
| private String addProperty(String style, String property)
|
|
284 |
| { |
|
285 |
1
| StyleProp prop;
|
|
286 |
| |
|
287 |
1
| prop = createProps(null, style);
|
|
288 |
1
| prop = createProps(prop, property);
|
|
289 |
1
| style = createPropString(prop);
|
|
290 |
1
| return style;
|
|
291 |
| } |
|
292 |
| |
|
293 |
| |
|
294 |
| |
|
295 |
| |
|
296 |
| |
|
297 |
| |
|
298 |
| |
|
299 |
1
| private String gensymClass(Lexer lexer, String tag)
|
|
300 |
| { |
|
301 |
1
| String str;
|
|
302 |
| |
|
303 |
1
| str = lexer.configuration.cssPrefix == null ? lexer.configuration.cssPrefix + this.classNum : "c"
|
|
304 |
| + this.classNum; |
|
305 |
1
| this.classNum++;
|
|
306 |
1
| return str;
|
|
307 |
| } |
|
308 |
| |
|
309 |
| |
|
310 |
| |
|
311 |
| |
|
312 |
| |
|
313 |
| |
|
314 |
| |
|
315 |
| |
|
316 |
1
| private String findStyle(Lexer lexer, String tag, String properties)
|
|
317 |
| { |
|
318 |
1
| Style style;
|
|
319 |
| |
|
320 |
1
| for (style = lexer.styles; style != null; style = style.next)
|
|
321 |
| { |
|
322 |
0
| if (style.tag.equals(tag) && style.properties.equals(properties))
|
|
323 |
| { |
|
324 |
0
| return style.tagClass;
|
|
325 |
| } |
|
326 |
| } |
|
327 |
| |
|
328 |
1
| style = new Style(tag, gensymClass(lexer, tag), properties, lexer.styles);
|
|
329 |
1
| lexer.styles = style;
|
|
330 |
1
| return style.tagClass;
|
|
331 |
| } |
|
332 |
| |
|
333 |
| |
|
334 |
| |
|
335 |
| |
|
336 |
| |
|
337 |
| |
|
338 |
| |
|
339 |
14
| private void style2Rule(Lexer lexer, Node node)
|
|
340 |
| { |
|
341 |
14
| AttVal styleattr, classattr;
|
|
342 |
14
| String classname;
|
|
343 |
| |
|
344 |
14
| styleattr = node.getAttrByName("style");
|
|
345 |
| |
|
346 |
14
| if (styleattr != null)
|
|
347 |
| { |
|
348 |
1
| classname = findStyle(lexer, node.element, styleattr.value);
|
|
349 |
1
| classattr = node.getAttrByName("class");
|
|
350 |
| |
|
351 |
| |
|
352 |
| |
|
353 |
1
| if (classattr != null)
|
|
354 |
| { |
|
355 |
0
| classattr.value = classattr.value + " " + classname;
|
|
356 |
0
| node.removeAttribute(styleattr);
|
|
357 |
| } |
|
358 |
| else |
|
359 |
| { |
|
360 |
| |
|
361 |
1
| styleattr.attribute = "class";
|
|
362 |
1
| styleattr.value = classname;
|
|
363 |
| } |
|
364 |
| } |
|
365 |
| } |
|
366 |
| |
|
367 |
| |
|
368 |
| |
|
369 |
| |
|
370 |
| |
|
371 |
| |
|
372 |
| |
|
373 |
0
| private void addColorRule(Lexer lexer, String selector, String color)
|
|
374 |
| { |
|
375 |
0
| if (color != null)
|
|
376 |
| { |
|
377 |
0
| lexer.addStringLiteral(selector);
|
|
378 |
0
| lexer.addStringLiteral(" { color: ");
|
|
379 |
0
| lexer.addStringLiteral(color);
|
|
380 |
0
| lexer.addStringLiteral(" }\n");
|
|
381 |
| } |
|
382 |
| } |
|
383 |
| |
|
384 |
| |
|
385 |
| |
|
386 |
| |
|
387 |
| |
|
388 |
| |
|
389 |
| |
|
390 |
| |
|
391 |
| |
|
392 |
| |
|
393 |
| |
|
394 |
| |
|
395 |
| |
|
396 |
| |
|
397 |
| |
|
398 |
| |
|
399 |
1
| private void cleanBodyAttrs(Lexer lexer, Node body)
|
|
400 |
| { |
|
401 |
1
| AttVal attr;
|
|
402 |
1
| String bgurl = null;
|
|
403 |
1
| String bgcolor = null;
|
|
404 |
1
| String color = null;
|
|
405 |
| |
|
406 |
1
| attr = body.getAttrByName("background");
|
|
407 |
| |
|
408 |
1
| if (attr != null)
|
|
409 |
| { |
|
410 |
0
| bgurl = attr.value;
|
|
411 |
0
| attr.value = null;
|
|
412 |
0
| body.removeAttribute(attr);
|
|
413 |
| } |
|
414 |
| |
|
415 |
1
| attr = body.getAttrByName("bgcolor");
|
|
416 |
| |
|
417 |
1
| if (attr != null)
|
|
418 |
| { |
|
419 |
0
| bgcolor = attr.value;
|
|
420 |
0
| attr.value = null;
|
|
421 |
0
| body.removeAttribute(attr);
|
|
422 |
| } |
|
423 |
| |
|
424 |
1
| attr = body.getAttrByName("text");
|
|
425 |
| |
|
426 |
1
| if (attr != null)
|
|
427 |
| { |
|
428 |
1
| color = attr.value;
|
|
429 |
1
| attr.value = null;
|
|
430 |
1
| body.removeAttribute(attr);
|
|
431 |
| } |
|
432 |
| |
|
433 |
1
| if (bgurl != null || bgcolor != null || color != null)
|
|
434 |
| { |
|
435 |
1
| lexer.addStringLiteral(" body {\n");
|
|
436 |
| |
|
437 |
1
| if (bgurl != null)
|
|
438 |
| { |
|
439 |
0
| lexer.addStringLiteral(" background-image: url(");
|
|
440 |
0
| lexer.addStringLiteral(bgurl);
|
|
441 |
0
| lexer.addStringLiteral(");\n");
|
|
442 |
| } |
|
443 |
| |
|
444 |
1
| if (bgcolor != null)
|
|
445 |
| { |
|
446 |
0
| lexer.addStringLiteral(" background-color: ");
|
|
447 |
0
| lexer.addStringLiteral(bgcolor);
|
|
448 |
0
| lexer.addStringLiteral(";\n");
|
|
449 |
| } |
|
450 |
| |
|
451 |
1
| if (color != null)
|
|
452 |
| { |
|
453 |
1
| lexer.addStringLiteral(" color: ");
|
|
454 |
1
| lexer.addStringLiteral(color);
|
|
455 |
1
| lexer.addStringLiteral(";\n");
|
|
456 |
| } |
|
457 |
| |
|
458 |
1
| lexer.addStringLiteral(" }\n");
|
|
459 |
| } |
|
460 |
| |
|
461 |
1
| attr = body.getAttrByName("link");
|
|
462 |
| |
|
463 |
1
| if (attr != null)
|
|
464 |
| { |
|
465 |
0
| addColorRule(lexer, " :link", attr.value);
|
|
466 |
0
| body.removeAttribute(attr);
|
|
467 |
| } |
|
468 |
| |
|
469 |
1
| attr = body.getAttrByName("vlink");
|
|
470 |
| |
|
471 |
1
| if (attr != null)
|
|
472 |
| { |
|
473 |
0
| addColorRule(lexer, " :visited", attr.value);
|
|
474 |
0
| body.removeAttribute(attr);
|
|
475 |
| } |
|
476 |
| |
|
477 |
1
| attr = body.getAttrByName("alink");
|
|
478 |
| |
|
479 |
1
| if (attr != null)
|
|
480 |
| { |
|
481 |
0
| addColorRule(lexer, " :active", attr.value);
|
|
482 |
0
| body.removeAttribute(attr);
|
|
483 |
| } |
|
484 |
| } |
|
485 |
| |
|
486 |
| |
|
487 |
| |
|
488 |
| |
|
489 |
| |
|
490 |
| |
|
491 |
| |
|
492 |
0
| private boolean niceBody(Lexer lexer, Node doc)
|
|
493 |
| { |
|
494 |
0
| Node body = doc.findBody(lexer.configuration.tt);
|
|
495 |
| |
|
496 |
0
| if (body != null)
|
|
497 |
| { |
|
498 |
0
| if (body.getAttrByName("background") != null
|
|
499 |
| || body.getAttrByName("bgcolor") != null |
|
500 |
| || body.getAttrByName("text") != null |
|
501 |
| || body.getAttrByName("link") != null |
|
502 |
| || body.getAttrByName("vlink") != null |
|
503 |
| || body.getAttrByName("alink") != null) |
|
504 |
| { |
|
505 |
0
| lexer.badLayout |= Report.USING_BODY;
|
|
506 |
0
| return false;
|
|
507 |
| } |
|
508 |
| } |
|
509 |
| |
|
510 |
0
| return true;
|
|
511 |
| } |
|
512 |
| |
|
513 |
| |
|
514 |
| |
|
515 |
| |
|
516 |
| |
|
517 |
| |
|
518 |
1
| private void createStyleElement(Lexer lexer, Node doc)
|
|
519 |
| { |
|
520 |
1
| Node node, head, body;
|
|
521 |
1
| Style style;
|
|
522 |
1
| AttVal av;
|
|
523 |
| |
|
524 |
1
| if (lexer.styles == null && niceBody(lexer, doc))
|
|
525 |
| { |
|
526 |
0
| return;
|
|
527 |
| } |
|
528 |
| |
|
529 |
1
| node = lexer.newNode(Node.START_TAG, null, 0, 0, "style");
|
|
530 |
1
| node.implicit = true;
|
|
531 |
| |
|
532 |
| |
|
533 |
1
| av = new AttVal(null, null, '"', "type", "text/css");
|
|
534 |
1
| av.dict = AttributeTable.getDefaultAttributeTable().findAttribute(av);
|
|
535 |
1
| node.attributes = av;
|
|
536 |
| |
|
537 |
1
| body = doc.findBody(lexer.configuration.tt);
|
|
538 |
| |
|
539 |
1
| lexer.txtstart = lexer.lexsize;
|
|
540 |
| |
|
541 |
1
| if (body != null)
|
|
542 |
| { |
|
543 |
1
| cleanBodyAttrs(lexer, body);
|
|
544 |
| } |
|
545 |
| |
|
546 |
1
| for (style = lexer.styles; style != null; style = style.next)
|
|
547 |
| { |
|
548 |
1
| lexer.addCharToLexer(' ');
|
|
549 |
1
| lexer.addStringLiteral(style.tag);
|
|
550 |
1
| lexer.addCharToLexer('.');
|
|
551 |
1
| lexer.addStringLiteral(style.tagClass);
|
|
552 |
1
| lexer.addCharToLexer(' ');
|
|
553 |
1
| lexer.addCharToLexer('{');
|
|
554 |
1
| lexer.addStringLiteral(style.properties);
|
|
555 |
1
| lexer.addCharToLexer('}');
|
|
556 |
1
| lexer.addCharToLexer('\n');
|
|
557 |
| } |
|
558 |
| |
|
559 |
1
| lexer.txtend = lexer.lexsize;
|
|
560 |
| |
|
561 |
1
| node.insertNodeAtEnd(lexer.newNode(Node.TEXT_NODE, lexer.lexbuf, lexer.txtstart, lexer.txtend));
|
|
562 |
| |
|
563 |
| |
|
564 |
| |
|
565 |
| |
|
566 |
1
| head = doc.findHEAD(lexer.configuration.tt);
|
|
567 |
| |
|
568 |
1
| if (head != null)
|
|
569 |
| { |
|
570 |
1
| head.insertNodeAtEnd(node);
|
|
571 |
| } |
|
572 |
| } |
|
573 |
| |
|
574 |
| |
|
575 |
| |
|
576 |
| |
|
577 |
| |
|
578 |
0
| private void fixNodeLinks(Node node)
|
|
579 |
| { |
|
580 |
0
| Node child;
|
|
581 |
| |
|
582 |
0
| if (node.prev != null)
|
|
583 |
| { |
|
584 |
0
| node.prev.next = node;
|
|
585 |
| } |
|
586 |
| else |
|
587 |
| { |
|
588 |
0
| node.parent.content = node;
|
|
589 |
| } |
|
590 |
| |
|
591 |
0
| if (node.next != null)
|
|
592 |
| { |
|
593 |
0
| node.next.prev = node;
|
|
594 |
| } |
|
595 |
| else |
|
596 |
| { |
|
597 |
0
| node.parent.last = node;
|
|
598 |
| } |
|
599 |
| |
|
600 |
0
| for (child = node.content; child != null; child = child.next)
|
|
601 |
| { |
|
602 |
0
| child.parent = node;
|
|
603 |
| } |
|
604 |
| } |
|
605 |
| |
|
606 |
| |
|
607 |
| |
|
608 |
| |
|
609 |
| |
|
610 |
6
| private void stripOnlyChild(Node node)
|
|
611 |
| { |
|
612 |
6
| Node child;
|
|
613 |
| |
|
614 |
6
| child = node.content;
|
|
615 |
6
| node.content = child.content;
|
|
616 |
6
| node.last = child.last;
|
|
617 |
6
| child.content = null;
|
|
618 |
| |
|
619 |
6
| for (child = node.content; child != null; child = child.next)
|
|
620 |
| { |
|
621 |
7
| child.parent = node;
|
|
622 |
| } |
|
623 |
| } |
|
624 |
| |
|
625 |
| |
|
626 |
| |
|
627 |
| |
|
628 |
| |
|
629 |
| |
|
630 |
| |
|
631 |
3
| private void discardContainer(Node element, Node[] pnode)
|
|
632 |
| { |
|
633 |
3
| Node node;
|
|
634 |
3
| Node parent = element.parent;
|
|
635 |
| |
|
636 |
3
| if (element.content != null)
|
|
637 |
| { |
|
638 |
3
| element.last.next = element.next;
|
|
639 |
| |
|
640 |
3
| if (element.next != null)
|
|
641 |
| { |
|
642 |
1
| element.next.prev = element.last;
|
|
643 |
1
| element.last.next = element.next;
|
|
644 |
| } |
|
645 |
| else |
|
646 |
| { |
|
647 |
2
| parent.last = element.last;
|
|
648 |
| } |
|
649 |
| |
|
650 |
3
| if (element.prev != null)
|
|
651 |
| { |
|
652 |
0
| element.content.prev = element.prev;
|
|
653 |
0
| element.prev.next = element.content;
|
|
654 |
| } |
|
655 |
| else |
|
656 |
| { |
|
657 |
3
| parent.content = element.content;
|
|
658 |
| } |
|
659 |
| |
|
660 |
3
| for (node = element.content; node != null; node = node.next)
|
|
661 |
| { |
|
662 |
6
| node.parent = parent;
|
|
663 |
| } |
|
664 |
| |
|
665 |
3
| pnode[0] = element.content;
|
|
666 |
| } |
|
667 |
| else |
|
668 |
| { |
|
669 |
0
| if (element.next != null)
|
|
670 |
| { |
|
671 |
0
| element.next.prev = element.prev;
|
|
672 |
| } |
|
673 |
| else |
|
674 |
| { |
|
675 |
0
| parent.last = element.prev;
|
|
676 |
| } |
|
677 |
| |
|
678 |
0
| if (element.prev != null)
|
|
679 |
| { |
|
680 |
0
| element.prev.next = element.next;
|
|
681 |
| } |
|
682 |
| else |
|
683 |
| { |
|
684 |
0
| parent.content = element.next;
|
|
685 |
| } |
|
686 |
| |
|
687 |
0
| pnode[0] = element.next;
|
|
688 |
| } |
|
689 |
| |
|
690 |
3
| element.next = null;
|
|
691 |
3
| element.content = null;
|
|
692 |
| } |
|
693 |
| |
|
694 |
| |
|
695 |
| |
|
696 |
| |
|
697 |
| |
|
698 |
| |
|
699 |
5
| private void addStyleProperty(Node node, String property)
|
|
700 |
| { |
|
701 |
5
| AttVal av;
|
|
702 |
| |
|
703 |
5
| for (av = node.attributes; av != null; av = av.next)
|
|
704 |
| { |
|
705 |
4
| if (av.attribute.equals("style"))
|
|
706 |
| { |
|
707 |
1
| break;
|
|
708 |
| } |
|
709 |
| } |
|
710 |
| |
|
711 |
| |
|
712 |
| |
|
713 |
5
| if (av != null)
|
|
714 |
| { |
|
715 |
1
| String s;
|
|
716 |
| |
|
717 |
1
| s = addProperty(av.value, property);
|
|
718 |
1
| av.value = s;
|
|
719 |
| } |
|
720 |
| else |
|
721 |
| { |
|
722 |
| |
|
723 |
4
| av = new AttVal(node.attributes, null, '"', "style", property);
|
|
724 |
4
| av.dict = AttributeTable.getDefaultAttributeTable().findAttribute(av);
|
|
725 |
4
| node.attributes = av;
|
|
726 |
| } |
|
727 |
| } |
|
728 |
| |
|
729 |
| |
|
730 |
| |
|
731 |
| |
|
732 |
| |
|
733 |
| |
|
734 |
| |
|
735 |
| |
|
736 |
| |
|
737 |
0
| private String mergeProperties(String s1, String s2)
|
|
738 |
| { |
|
739 |
0
| String s;
|
|
740 |
0
| StyleProp prop;
|
|
741 |
| |
|
742 |
0
| prop = createProps(null, s1);
|
|
743 |
0
| prop = createProps(prop, s2);
|
|
744 |
0
| s = createPropString(prop);
|
|
745 |
0
| return s;
|
|
746 |
| } |
|
747 |
| |
|
748 |
| |
|
749 |
| |
|
750 |
| |
|
751 |
| |
|
752 |
| |
|
753 |
3
| private void mergeClasses(Node node, Node child)
|
|
754 |
| { |
|
755 |
3
| AttVal av;
|
|
756 |
3
| String s1, s2, names;
|
|
757 |
| |
|
758 |
3
| for (s2 = null, av = child.attributes; av != null; av = av.next)
|
|
759 |
| { |
|
760 |
1
| if ("class".equals(av.attribute))
|
|
761 |
| { |
|
762 |
1
| s2 = av.value;
|
|
763 |
1
| break;
|
|
764 |
| } |
|
765 |
| } |
|
766 |
| |
|
767 |
3
| for (s1 = null, av = node.attributes; av != null; av = av.next)
|
|
768 |
| { |
|
769 |
1
| if ("class".equals(av.attribute))
|
|
770 |
| { |
|
771 |
0
| s1 = av.value;
|
|
772 |
0
| break;
|
|
773 |
| } |
|
774 |
| } |
|
775 |
| |
|
776 |
3
| if (s1 != null)
|
|
777 |
| { |
|
778 |
0
| if (s2 != null)
|
|
779 |
| { |
|
780 |
0
| names = s1 + ' ' + s2;
|
|
781 |
0
| av.value = names;
|
|
782 |
| } |
|
783 |
| } |
|
784 |
3
| else if (s2 != null)
|
|
785 |
| { |
|
786 |
1
| av = new AttVal(node.attributes, null, '"', "class", s2);
|
|
787 |
1
| av.dict = AttributeTable.getDefaultAttributeTable().findAttribute(av);
|
|
788 |
1
| node.attributes = av;
|
|
789 |
| } |
|
790 |
| } |
|
791 |
| |
|
792 |
| |
|
793 |
| |
|
794 |
| |
|
795 |
| |
|
796 |
| |
|
797 |
3
| private void mergeStyles(Node node, Node child)
|
|
798 |
| { |
|
799 |
3
| AttVal av;
|
|
800 |
3
| String s1, s2, style;
|
|
801 |
| |
|
802 |
| |
|
803 |
| |
|
804 |
3
| mergeClasses(node, child);
|
|
805 |
| |
|
806 |
3
| for (s2 = null, av = child.attributes; av != null; av = av.next)
|
|
807 |
| { |
|
808 |
1
| if (av.attribute.equals("style"))
|
|
809 |
| { |
|
810 |
0
| s2 = av.value;
|
|
811 |
0
| break;
|
|
812 |
| } |
|
813 |
| } |
|
814 |
| |
|
815 |
3
| for (s1 = null, av = node.attributes; av != null; av = av.next)
|
|
816 |
| { |
|
817 |
2
| if (av.attribute.equals("style"))
|
|
818 |
| { |
|
819 |
0
| s1 = av.value;
|
|
820 |
0
| break;
|
|
821 |
| } |
|
822 |
| } |
|
823 |
| |
|
824 |
3
| if (s1 != null)
|
|
825 |
| { |
|
826 |
0
| if (s2 != null)
|
|
827 |
| { |
|
828 |
0
| style = mergeProperties(s1, s2);
|
|
829 |
0
| av.value = style;
|
|
830 |
| } |
|
831 |
| } |
|
832 |
3
| else if (s2 != null)
|
|
833 |
| { |
|
834 |
0
| av = new AttVal(node.attributes, null, '"', "style", s2);
|
|
835 |
0
| av.dict = AttributeTable.getDefaultAttributeTable().findAttribute(av);
|
|
836 |
0
| node.attributes = av;
|
|
837 |
| } |
|
838 |
| } |
|
839 |
| |
|
840 |
| |
|
841 |
| |
|
842 |
| |
|
843 |
| |
|
844 |
| |
|
845 |
2
| private String fontSize2Name(String size)
|
|
846 |
| { |
|
847 |
2
| String[] sizes = {"60%", "70%", "80%", null, "120%", "150%", "200%"};
|
|
848 |
2
| String buf;
|
|
849 |
| |
|
850 |
2
| if (size.length() > 0 && '0' <= size.charAt(0) && size.charAt(0) <= '6')
|
|
851 |
| { |
|
852 |
0
| int n = size.charAt(0) - '0';
|
|
853 |
0
| return sizes[n];
|
|
854 |
| } |
|
855 |
| |
|
856 |
2
| if (size.length() > 0 && size.charAt(0) == '-')
|
|
857 |
| { |
|
858 |
2
| if (size.length() > 1 && '0' <= size.charAt(1) && size.charAt(1) <= '6')
|
|
859 |
| { |
|
860 |
2
| int n = size.charAt(1) - '0';
|
|
861 |
2
| double x;
|
|
862 |
| |
|
863 |
2
| for (x = 1.0; n > 0; --n)
|
|
864 |
| { |
|
865 |
4
| x *= 0.8;
|
|
866 |
| } |
|
867 |
| |
|
868 |
2
| x *= 100.0;
|
|
869 |
2
| buf = "" + (int) x + "%";
|
|
870 |
| |
|
871 |
2
| return buf;
|
|
872 |
| } |
|
873 |
| |
|
874 |
0
| return "smaller";
|
|
875 |
| } |
|
876 |
| |
|
877 |
0
| if (size.length() > 1 && '0' <= size.charAt(1) && size.charAt(1) <= '6')
|
|
878 |
| { |
|
879 |
0
| int n = size.charAt(1) - '0';
|
|
880 |
0
| double x;
|
|
881 |
| |
|
882 |
0
| for (x = 1.0; n > 0; --n)
|
|
883 |
| { |
|
884 |
0
| x *= 1.2;
|
|
885 |
| } |
|
886 |
| |
|
887 |
0
| x *= 100.0;
|
|
888 |
0
| buf = "" + (int) x + "%";
|
|
889 |
| |
|
890 |
0
| return buf;
|
|
891 |
| } |
|
892 |
| |
|
893 |
0
| return "larger";
|
|
894 |
| } |
|
895 |
| |
|
896 |
| |
|
897 |
| |
|
898 |
| |
|
899 |
| |
|
900 |
| |
|
901 |
1
| private void addFontFace(Node node, String face)
|
|
902 |
| { |
|
903 |
1
| addStyleProperty(node, "font-family: " + face);
|
|
904 |
| } |
|
905 |
| |
|
906 |
| |
|
907 |
| |
|
908 |
| |
|
909 |
| |
|
910 |
| |
|
911 |
2
| private void addFontSize(Node node, String size)
|
|
912 |
| { |
|
913 |
2
| if (size == null)
|
|
914 |
| { |
|
915 |
0
| return;
|
|
916 |
| } |
|
917 |
| |
|
918 |
2
| if ("6".equals(size) && node.tag == this.tt.tagP)
|
|
919 |
| { |
|
920 |
0
| node.element = "h1";
|
|
921 |
0
| this.tt.findTag(node);
|
|
922 |
0
| return;
|
|
923 |
| } |
|
924 |
| |
|
925 |
2
| if ("5".equals(size) && node.tag == this.tt.tagP)
|
|
926 |
| { |
|
927 |
0
| node.element = "h2";
|
|
928 |
0
| this.tt.findTag(node);
|
|
929 |
0
| return;
|
|
930 |
| } |
|
931 |
| |
|
932 |
2
| if ("4".equals(size) && node.tag == this.tt.tagP)
|
|
933 |
| { |
|
934 |
0
| node.element = "h3";
|
|
935 |
0
| this.tt.findTag(node);
|
|
936 |
0
| return;
|
|
937 |
| } |
|
938 |
| |
|
939 |
2
| String value = fontSize2Name(size);
|
|
940 |
| |
|
941 |
2
| if (value != null)
|
|
942 |
| { |
|
943 |
2
| addStyleProperty(node, "font-size: " + value);
|
|
944 |
| } |
|
945 |
| } |
|
946 |
| |
|
947 |
| |
|
948 |
| |
|
949 |
| |
|
950 |
| |
|
951 |
| |
|
952 |
0
| private void addFontColor(Node node, String color)
|
|
953 |
| { |
|
954 |
0
| addStyleProperty(node, "color: " + color);
|
|
955 |
| } |
|
956 |
| |
|
957 |
| |
|
958 |
| |
|
959 |
| |
|
960 |
| |
|
961 |
| |
|
962 |
0
| private void addAlign(Node node, String align)
|
|
963 |
| { |
|
964 |
| |
|
965 |
0
| addStyleProperty(node, "text-align: " + align.toLowerCase());
|
|
966 |
| } |
|
967 |
| |
|
968 |
| |
|
969 |
| |
|
970 |
| |
|
971 |
| |
|
972 |
| |
|
973 |
2
| private void addFontStyles(Node node, AttVal av)
|
|
974 |
| { |
|
975 |
2
| while (av != null)
|
|
976 |
| { |
|
977 |
3
| if (av.attribute.equals("face"))
|
|
978 |
| { |
|
979 |
1
| addFontFace(node, av.value);
|
|
980 |
| } |
|
981 |
2
| else if (av.attribute.equals("size"))
|
|
982 |
| { |
|
983 |
2
| addFontSize(node, av.value);
|
|
984 |
| } |
|
985 |
0
| else if (av.attribute.equals("color"))
|
|
986 |
| { |
|
987 |
0
| addFontColor(node, av.value);
|
|
988 |
| } |
|
989 |
| |
|
990 |
3
| av = av.next;
|
|
991 |
| } |
|
992 |
| } |
|
993 |
| |
|
994 |
| |
|
995 |
| |
|
996 |
| |
|
997 |
| |
|
998 |
| |
|
999 |
40
| private void textAlign(Lexer lexer, Node node)
|
|
1000 |
| { |
|
1001 |
40
| AttVal av, prev;
|
|
1002 |
| |
|
1003 |
40
| prev = null;
|
|
1004 |
| |
|
1005 |
40
| for (av = node.attributes; av != null; av = av.next)
|
|
1006 |
| { |
|
1007 |
17
| if (av.attribute.equals("align"))
|
|
1008 |
| { |
|
1009 |
0
| if (prev != null)
|
|
1010 |
| { |
|
1011 |
0
| prev.next = av.next;
|
|
1012 |
| } |
|
1013 |
| else |
|
1014 |
| { |
|
1015 |
0
| node.attributes = av.next;
|
|
1016 |
| } |
|
1017 |
| |
|
1018 |
0
| if (av.value != null)
|
|
1019 |
| { |
|
1020 |
0
| addAlign(node, av.value);
|
|
1021 |
| } |
|
1022 |
| |
|
1023 |
0
| break;
|
|
1024 |
| } |
|
1025 |
| |
|
1026 |
17
| prev = av;
|
|
1027 |
| } |
|
1028 |
| } |
|
1029 |
| |
|
1030 |
| |
|
1031 |
| |
|
1032 |
| |
|
1033 |
| |
|
1034 |
| |
|
1035 |
| |
|
1036 |
| |
|
1037 |
| |
|
1038 |
673
| private boolean dir2Div(Lexer lexer, Node node)
|
|
1039 |
| { |
|
1040 |
673
| Node child;
|
|
1041 |
| |
|
1042 |
673
| if (node.tag == this.tt.tagDir || node.tag == this.tt.tagUl || node.tag == this.tt.tagOl)
|
|
1043 |
| { |
|
1044 |
1
| child = node.content;
|
|
1045 |
| |
|
1046 |
1
| if (child == null)
|
|
1047 |
| { |
|
1048 |
0
| return false;
|
|
1049 |
| } |
|
1050 |
| |
|
1051 |
| |
|
1052 |
1
| if (child.next != null)
|
|
1053 |
| { |
|
1054 |
1
| return false;
|
|
1055 |
| } |
|
1056 |
| |
|
1057 |
0
| if (child.tag != this.tt.tagLi)
|
|
1058 |
| { |
|
1059 |
0
| return false;
|
|
1060 |
| } |
|
1061 |
| |
|
1062 |
0
| if (!child.implicit)
|
|
1063 |
| { |
|
1064 |
0
| return false;
|
|
1065 |
| } |
|
1066 |
| |
|
1067 |
| |
|
1068 |
0
| node.tag = this.tt.tagDiv;
|
|
1069 |
0
| node.element = "div";
|
|
1070 |
0
| addStyleProperty(node, "margin-left: 2em");
|
|
1071 |
0
| stripOnlyChild(node);
|
|
1072 |
0
| return true;
|
|
1073 |
| } |
|
1074 |
| |
|
1075 |
672
| return false;
|
|
1076 |
| } |
|
1077 |
| |
|
1078 |
| |
|
1079 |
| |
|
1080 |
| |
|
1081 |
| |
|
1082 |
| |
|
1083 |
| |
|
1084 |
| |
|
1085 |
| |
|
1086 |
| |
|
1087 |
| |
|
1088 |
| |
|
1089 |
| |
|
1090 |
| |
|
1091 |
| |
|
1092 |
673
| private boolean center2Div(Lexer lexer, Node node, Node[] pnode)
|
|
1093 |
| { |
|
1094 |
673
| if (node.tag == this.tt.tagCenter)
|
|
1095 |
| { |
|
1096 |
0
| if (lexer.configuration.dropFontTags)
|
|
1097 |
| { |
|
1098 |
0
| if (node.content != null)
|
|
1099 |
| { |
|
1100 |
0
| Node last = node.last;
|
|
1101 |
0
| Node parent = node.parent;
|
|
1102 |
| |
|
1103 |
0
| discardContainer(node, pnode);
|
|
1104 |
| |
|
1105 |
0
| node = lexer.inferredTag("br");
|
|
1106 |
| |
|
1107 |
0
| if (last.next != null)
|
|
1108 |
| { |
|
1109 |
0
| last.next.prev = node;
|
|
1110 |
| } |
|
1111 |
| |
|
1112 |
0
| node.next = last.next;
|
|
1113 |
0
| last.next = node;
|
|
1114 |
0
| node.prev = last;
|
|
1115 |
| |
|
1116 |
0
| if (parent.last == last)
|
|
1117 |
| { |
|
1118 |
0
| parent.last = node;
|
|
1119 |
| } |
|
1120 |
| |
|
1121 |
0
| node.parent = parent;
|
|
1122 |
| } |
|
1123 |
| else |
|
1124 |
| { |
|
1125 |
0
| Node prev = node.prev;
|
|
1126 |
0
| Node next = node.next;
|
|
1127 |
0
| Node parent = node.parent;
|
|
1128 |
0
| discardContainer(node, pnode);
|
|
1129 |
| |
|
1130 |
0
| node = lexer.inferredTag("br");
|
|
1131 |
0
| node.next = next;
|
|
1132 |
0
| node.prev = prev;
|
|
1133 |
0
| node.parent = parent;
|
|
1134 |
| |
|
1135 |
0
| if (next != null)
|
|
1136 |
| { |
|
1137 |
0
| next.prev = node;
|
|
1138 |
| } |
|
1139 |
| else |
|
1140 |
| { |
|
1141 |
0
| parent.last = node;
|
|
1142 |
| } |
|
1143 |
| |
|
1144 |
0
| if (prev != null)
|
|
1145 |
| { |
|
1146 |
0
| prev.next = node;
|
|
1147 |
| } |
|
1148 |
| else |
|
1149 |
| { |
|
1150 |
0
| parent.content = node;
|
|
1151 |
| } |
|
1152 |
| } |
|
1153 |
| |
|
1154 |
0
| return true;
|
|
1155 |
| } |
|
1156 |
0
| node.tag = this.tt.tagDiv;
|
|
1157 |
0
| node.element = "div";
|
|
1158 |
0
| addStyleProperty(node, "text-align: center");
|
|
1159 |
0
| return true;
|
|
1160 |
| } |
|
1161 |
| |
|
1162 |
673
| return false;
|
|
1163 |
| } |
|
1164 |
| |
|
1165 |
| |
|
1166 |
| |
|
1167 |
| |
|
1168 |
| |
|
1169 |
| |
|
1170 |
| |
|
1171 |
| |
|
1172 |
673
| private boolean mergeDivs(Lexer lexer, Node node)
|
|
1173 |
| { |
|
1174 |
673
| Node child;
|
|
1175 |
| |
|
1176 |
673
| if (node.tag != this.tt.tagDiv)
|
|
1177 |
| { |
|
1178 |
668
| return false;
|
|
1179 |
| } |
|
1180 |
| |
|
1181 |
5
| child = node.content;
|
|
1182 |
| |
|
1183 |
5
| if (child == null)
|
|
1184 |
| { |
|
1185 |
0
| return false;
|
|
1186 |
| } |
|
1187 |
| |
|
1188 |
5
| if (child.tag != this.tt.tagDiv)
|
|
1189 |
| { |
|
1190 |
4
| return false;
|
|
1191 |
| } |
|
1192 |
| |
|
1193 |
1
| if (child.next != null)
|
|
1194 |
| { |
|
1195 |
0
| return false;
|
|
1196 |
| } |
|
1197 |
| |
|
1198 |
1
| mergeStyles(node, child);
|
|
1199 |
1
| stripOnlyChild(node);
|
|
1200 |
1
| return true;
|
|
1201 |
| } |
|
1202 |
| |
|
1203 |
| |
|
1204 |
| |
|
1205 |
| |
|
1206 |
| |
|
1207 |
| |
|
1208 |
| |
|
1209 |
| |
|
1210 |
| |
|
1211 |
| |
|
1212 |
| |
|
1213 |
| |
|
1214 |
| |
|
1215 |
| |
|
1216 |
| |
|
1217 |
| |
|
1218 |
673
| private boolean nestedList(Lexer lexer, Node node, Node[] pnode)
|
|
1219 |
| { |
|
1220 |
673
| Node child, list;
|
|
1221 |
| |
|
1222 |
673
| if (node.tag == this.tt.tagUl || node.tag == this.tt.tagOl)
|
|
1223 |
| { |
|
1224 |
1
| child = node.content;
|
|
1225 |
| |
|
1226 |
1
| if (child == null)
|
|
1227 |
| { |
|
1228 |
0
| return false;
|
|
1229 |
| } |
|
1230 |
| |
|
1231 |
| |
|
1232 |
| |
|
1233 |
1
| if (child.next != null)
|
|
1234 |
| { |
|
1235 |
1
| return false;
|
|
1236 |
| } |
|
1237 |
| |
|
1238 |
0
| list = child.content;
|
|
1239 |
| |
|
1240 |
0
| if (list == null)
|
|
1241 |
| { |
|
1242 |
0
| return false;
|
|
1243 |
| } |
|
1244 |
| |
|
1245 |
0
| if (list.tag != node.tag)
|
|
1246 |
| { |
|
1247 |
0
| return false;
|
|
1248 |
| } |
|
1249 |
| |
|
1250 |
0
| pnode[0] = list;
|
|
1251 |
| |
|
1252 |
| |
|
1253 |
0
| list.prev = node.prev;
|
|
1254 |
0
| list.next = node.next;
|
|
1255 |
0
| list.parent = node.parent;
|
|
1256 |
0
| fixNodeLinks(list);
|
|
1257 |
| |
|
1258 |
| |
|
1259 |
| |
|
1260 |
0
| child.content = null;
|
|
1261 |
0
| node.content = null;
|
|
1262 |
0
| node.next = null;
|
|
1263 |
0
| node = null;
|
|
1264 |
| |
|
1265 |
| |
|
1266 |
| |
|
1267 |
0
| if (list.prev != null)
|
|
1268 |
| { |
|
1269 |
0
| if (list.prev.tag == this.tt.tagUl || list.prev.tag == this.tt.tagOl)
|
|
1270 |
| { |
|
1271 |
| |
|
1272 |
0
| node = list;
|
|
1273 |
0
| list = node.prev;
|
|
1274 |
| |
|
1275 |
0
| list.next = node.next;
|
|
1276 |
| |
|
1277 |
0
| if (list.next != null)
|
|
1278 |
| { |
|
1279 |
0
| list.next.prev = list;
|
|
1280 |
| } |
|
1281 |
| |
|
1282 |
0
| child = list.last;
|
|
1283 |
| |
|
1284 |
0
| node.parent = child;
|
|
1285 |
0
| node.next = null;
|
|
1286 |
0
| node.prev = child.last;
|
|
1287 |
0
| fixNodeLinks(node);
|
|
1288 |
0
| cleanNode(lexer, node);
|
|
1289 |
| } |
|
1290 |
| } |
|
1291 |
| |
|
1292 |
0
| return true;
|
|
1293 |
| } |
|
1294 |
| |
|
1295 |
672
| return false;
|
|
1296 |
| } |
|
1297 |
| |
|
1298 |
| |
|
1299 |
| |
|
1300 |
| |
|
1301 |
| |
|
1302 |
| |
|
1303 |
| |
|
1304 |
| |
|
1305 |
| |
|
1306 |
| |
|
1307 |
| |
|
1308 |
| |
|
1309 |
| |
|
1310 |
| |
|
1311 |
| |
|
1312 |
| |
|
1313 |
| |
|
1314 |
| |
|
1315 |
| |
|
1316 |
| |
|
1317 |
| |
|
1318 |
| |
|
1319 |
| |
|
1320 |
| |
|
1321 |
| |
|
1322 |
| |
|
1323 |
| |
|
1324 |
672
| private boolean blockStyle(Lexer lexer, Node node)
|
|
1325 |
| { |
|
1326 |
672
| Node child;
|
|
1327 |
| |
|
1328 |
672
| if ((node.tag.model & (Dict.CM_BLOCK | Dict.CM_LIST | Dict.CM_DEFLIST | Dict.CM_TABLE)) != 0)
|
|
1329 |
| { |
|
1330 |
55
| if (node.tag != this.tt.tagTable && node.tag != this.tt.tagTr && node.tag != this.tt.tagLi)
|
|
1331 |
| { |
|
1332 |
| |
|
1333 |
40
| if (node.tag != this.tt.tagCaption)
|
|
1334 |
| { |
|
1335 |
40
| textAlign(lexer, node);
|
|
1336 |
| } |
|
1337 |
| |
|
1338 |
40
| child = node.content;
|
|
1339 |
| |
|
1340 |
40
| if (child == null)
|
|
1341 |
| { |
|
1342 |
6
| return false;
|
|
1343 |
| } |
|
1344 |
| |
|
1345 |
| |
|
1346 |
34
| if (child.next != null)
|
|
1347 |
| { |
|
1348 |
12
| return false;
|
|
1349 |
| } |
|
1350 |
| |
|
1351 |
22
| if (child.tag == this.tt.tagB)
|
|
1352 |
| { |
|
1353 |
2
| mergeStyles(node, child);
|
|
1354 |
2
| addStyleProperty(node, "font-weight: bold");
|
|
1355 |
2
| stripOnlyChild(node);
|
|
1356 |
2
| return true;
|
|
1357 |
| } |
|
1358 |
| |
|
1359 |
20
| if (child.tag == this.tt.tagI)
|
|
1360 |
| { |
|
1361 |
0
| mergeStyles(node, child);
|
|
1362 |
0
| addStyleProperty(node, "font-style: italic");
|
|
1363 |
0
| stripOnlyChild(node);
|
|
1364 |
0
| return true;
|
|
1365 |
| } |
|
1366 |
| |
|
1367 |
20
| if (child.tag == this.tt.tagFont)
|
|
1368 |
| { |
|
1369 |
0
| mergeStyles(node, child);
|
|
1370 |
0
| addFontStyles(node, child.attributes);
|
|
1371 |
0
| stripOnlyChild(node);
|
|
1372 |
0
| return true;
|
|
1373 |
| } |
|
1374 |
| } |
|
1375 |
| } |
|
1376 |
| |
|
1377 |
652
| return false;
|
|
1378 |
| } |
|
1379 |
| |
|
1380 |
| |
|
1381 |
| |
|
1382 |
| |
|
1383 |
| |
|
1384 |
| |
|
1385 |
| |
|
1386 |
| |
|
1387 |
| |
|
1388 |
670
| private boolean inlineStyle(Lexer lexer, Node node, Node[] pnode)
|
|
1389 |
| { |
|
1390 |
670
| Node child;
|
|
1391 |
| |
|
1392 |
670
| if (node.tag != this.tt.tagFont && (node.tag.model & (Dict.CM_INLINE | Dict.CM_ROW)) != 0)
|
|
1393 |
| { |
|
1394 |
543
| child = node.content;
|
|
1395 |
| |
|
1396 |
543
| if (child == null)
|
|
1397 |
| { |
|
1398 |
264
| return false;
|
|
1399 |
| } |
|
1400 |
| |
|
1401 |
| |
|
1402 |
279
| if (child.next != null)
|
|
1403 |
| { |
|
1404 |
5
| return false;
|
|
1405 |
| } |
|
1406 |
| |
|
1407 |
274
| if (child.tag == this.tt.tagB && lexer.configuration.logicalEmphasis)
|
|
1408 |
| { |
|
1409 |
0
| mergeStyles(node, child);
|
|
1410 |
0
| addStyleProperty(node, "font-weight: bold");
|
|
1411 |
0
| stripOnlyChild(node);
|
|
1412 |
0
| return true;
|
|
1413 |
| } |
|
1414 |
| |
|
1415 |
274
| if (child.tag == this.tt.tagI && lexer.configuration.logicalEmphasis)
|
|
1416 |
| { |
|
1417 |
0
| mergeStyles(node, child);
|
|
1418 |
0
| addStyleProperty(node, "font-style: italic");
|
|
1419 |
0
| stripOnlyChild(node);
|
|
1420 |
0
| return true;
|
|
1421 |
| } |
|
1422 |
| |
|
1423 |
274
| if (child.tag == this.tt.tagFont)
|
|
1424 |
| { |
|
1425 |
0
| mergeStyles(node, child);
|
|
1426 |
0
| addFontStyles(node, child.attributes);
|
|
1427 |
0
| stripOnlyChild(node);
|
|
1428 |
0
| return true;
|
|
1429 |
| } |
|
1430 |
| } |
|
1431 |
| |
|
1432 |
401
| return false;
|
|
1433 |
| } |
|
1434 |
| |
|
1435 |
| |
|
1436 |
| |
|
1437 |
| |
|
1438 |
| |
|
1439 |
| |
|
1440 |
| |
|
1441 |
| |
|
1442 |
| |
|
1443 |
670
| private boolean font2Span(Lexer lexer, Node node, Node[] pnode)
|
|
1444 |
| { |
|
1445 |
670
| AttVal av, style, next;
|
|
1446 |
| |
|
1447 |
670
| if (node.tag == this.tt.tagFont)
|
|
1448 |
| { |
|
1449 |
4
| if (lexer.configuration.dropFontTags)
|
|
1450 |
| { |
|
1451 |
2
| discardContainer(node, pnode);
|
|
1452 |
2
| return false;
|
|
1453 |
| } |
|
1454 |
| |
|
1455 |
| |
|
1456 |
2
| if (node.parent.content == node && node.next == null)
|
|
1457 |
| { |
|
1458 |
0
| return false;
|
|
1459 |
| } |
|
1460 |
| |
|
1461 |
2
| addFontStyles(node, node.attributes);
|
|
1462 |
| |
|
1463 |
| |
|
1464 |
2
| av = node.attributes;
|
|
1465 |
2
| style = null;
|
|
1466 |
| |
|
1467 |
2
| while (av != null)
|
|
1468 |
| { |
|
1469 |
5
| next = av.next;
|
|
1470 |
| |
|
1471 |
5
| if (av.attribute.equals("style"))
|
|
1472 |
| { |
|
1473 |
2
| av.next = null;
|
|
1474 |
2
| style = av;
|
|
1475 |
| } |
|
1476 |
| |
|
1477 |
5
| av = next;
|
|
1478 |
| } |
|
1479 |
| |
|
1480 |
2
| node.attributes = style;
|
|
1481 |
| |
|
1482 |
2
| node.tag = this.tt.tagSpan;
|
|
1483 |
2
| node.element = "span";
|
|
1484 |
| |
|
1485 |
2
| return true;
|
|
1486 |
| } |
|
1487 |
| |
|
1488 |
666
| return false;
|
|
1489 |
| } |
|
1490 |
| |
|
1491 |
| |
|
1492 |
| |
|
1493 |
| |
|
1494 |
| |
|
1495 |
| |
|
1496 |
| |
|
1497 |
1833
| private Node cleanNode(Lexer lexer, Node node)
|
|
1498 |
| { |
|
1499 |
1833
| Node next = null;
|
|
1500 |
1833
| Node[] o = new Node[1];
|
|
1501 |
1833
| boolean b = false;
|
|
1502 |
| |
|
1503 |
1833
| for (next = node; node != null && node.isElement(); node = next)
|
|
1504 |
| { |
|
1505 |
673
| o[0] = next;
|
|
1506 |
| |
|
1507 |
673
| b = dir2Div(lexer, node);
|
|
1508 |
673
| next = o[0];
|
|
1509 |
673
| if (b)
|
|
1510 |
| { |
|
1511 |
0
| continue;
|
|
1512 |
| } |
|
1513 |
| |
|
1514 |
| |
|
1515 |
| |
|
1516 |
673
| b = nestedList(lexer, node, o);
|
|
1517 |
673
| next = o[0];
|
|
1518 |
673
| if (b)
|
|
1519 |
| { |
|
1520 |
0
| return next;
|
|
1521 |
| } |
|
1522 |
| |
|
1523 |
673
| b = center2Div(lexer, node, o);
|
|
1524 |
673
| next = o[0];
|
|
1525 |
673
| if (b)
|
|
1526 |
| { |
|
1527 |
0
| continue;
|
|
1528 |
| } |
|
1529 |
| |
|
1530 |
673
| b = mergeDivs(lexer, node);
|
|
1531 |
673
| next = o[0];
|
|
1532 |
673
| if (b)
|
|
1533 |
| { |
|
1534 |
1
| continue;
|
|
1535 |
| } |
|
1536 |
| |
|
1537 |
672
| b = blockStyle(lexer, node);
|
|
1538 |
672
| next = o[0];
|
|
1539 |
672
| if (b)
|
|
1540 |
| { |
|
1541 |
2
| continue;
|
|
1542 |
| } |
|
1543 |
| |
|
1544 |
670
| b = inlineStyle(lexer, node, o);
|
|
1545 |
670
| next = o[0];
|
|
1546 |
670
| if (b)
|
|
1547 |
| { |
|
1548 |
0
| continue;
|
|
1549 |
| } |
|
1550 |
| |
|
1551 |
670
| b = font2Span(lexer, node, o);
|
|
1552 |
670
| next = o[0];
|
|
1553 |
670
| if (b)
|
|
1554 |
| { |
|
1555 |
2
| continue;
|
|
1556 |
| } |
|
1557 |
| |
|
1558 |
668
| break;
|
|
1559 |
| } |
|
1560 |
| |
|
1561 |
1833
| return next;
|
|
1562 |
| } |
|
1563 |
| |
|
1564 |
| |
|
1565 |
| |
|
1566 |
| |
|
1567 |
| |
|
1568 |
| |
|
1569 |
| |
|
1570 |
| |
|
1571 |
| |
|
1572 |
| |
|
1573 |
1833
| private Node createStyleProperties(Lexer lexer, Node node, Node[] prepl)
|
|
1574 |
| { |
|
1575 |
1833
| Node child = node.content;
|
|
1576 |
| |
|
1577 |
1833
| if (child != null)
|
|
1578 |
| { |
|
1579 |
408
| Node[] repl = new Node[1];
|
|
1580 |
408
| repl[0] = node;
|
|
1581 |
408
| while (child != null)
|
|
1582 |
| { |
|
1583 |
1816
| child = createStyleProperties(lexer, child, repl);
|
|
1584 |
1816
| if (repl[0] != node)
|
|
1585 |
| { |
|
1586 |
0
| return repl[0];
|
|
1587 |
| } |
|
1588 |
1816
| if (child != null)
|
|
1589 |
| { |
|
1590 |
1816
| child = child.next;
|
|
1591 |
| } |
|
1592 |
| } |
|
1593 |
| } |
|
1594 |
| |
|
1595 |
1833
| return cleanNode(lexer, node);
|
|
1596 |
| } |
|
1597 |
| |
|
1598 |
| |
|
1599 |
| |
|
1600 |
| |
|
1601 |
| |
|
1602 |
| |
|
1603 |
14
| private void defineStyleRules(Lexer lexer, Node node)
|
|
1604 |
| { |
|
1605 |
14
| Node child;
|
|
1606 |
| |
|
1607 |
14
| if (node.content != null)
|
|
1608 |
| { |
|
1609 |
8
| child = node.content;
|
|
1610 |
8
| while (child != null)
|
|
1611 |
| { |
|
1612 |
13
| defineStyleRules(lexer, child);
|
|
1613 |
13
| child = child.next;
|
|
1614 |
| } |
|
1615 |
| } |
|
1616 |
| |
|
1617 |
14
| style2Rule(lexer, node);
|
|
1618 |
| } |
|
1619 |
| |
|
1620 |
| |
|
1621 |
| |
|
1622 |
| |
|
1623 |
| |
|
1624 |
| |
|
1625 |
17
| public void cleanTree(Lexer lexer, Node doc)
|
|
1626 |
| { |
|
1627 |
17
| Node[] repl = new Node[1];
|
|
1628 |
17
| repl[0] = doc;
|
|
1629 |
17
| doc = createStyleProperties(lexer, doc, repl);
|
|
1630 |
| |
|
1631 |
17
| if (!lexer.configuration.makeClean)
|
|
1632 |
| { |
|
1633 |
1
| defineStyleRules(lexer, doc);
|
|
1634 |
1
| createStyleElement(lexer, doc);
|
|
1635 |
| } |
|
1636 |
| } |
|
1637 |
| |
|
1638 |
| |
|
1639 |
| |
|
1640 |
| |
|
1641 |
| |
|
1642 |
5567
| public void nestedEmphasis(Node node)
|
|
1643 |
| { |
|
1644 |
5567
| Node[] o = new Node[1];
|
|
1645 |
5567
| Node next;
|
|
1646 |
| |
|
1647 |
5567
| while (node != null)
|
|
1648 |
| { |
|
1649 |
12722
| next = node.next;
|
|
1650 |
| |
|
1651 |
12722
| if ((node.tag == this.tt.tagB || node.tag == this.tt.tagI)
|
|
1652 |
| && node.parent != null |
|
1653 |
| && node.parent.tag == node.tag) |
|
1654 |
| { |
|
1655 |
| |
|
1656 |
1
| o[0] = next;
|
|
1657 |
1
| discardContainer(node, o);
|
|
1658 |
1
| next = o[0];
|
|
1659 |
1
| node = next;
|
|
1660 |
1
| continue;
|
|
1661 |
| } |
|
1662 |
| |
|
1663 |
12721
| if (node.content != null)
|
|
1664 |
| { |
|
1665 |
5348
| nestedEmphasis(node.content);
|
|
1666 |
| } |
|
1667 |
| |
|
1668 |
12721
| node = next;
|
|
1669 |
| } |
|
1670 |
| } |
|
1671 |
| |
|
1672 |
| |
|
1673 |
| |
|
1674 |
| |
|
1675 |
| |
|
1676 |
136
| public void emFromI(Node node)
|
|
1677 |
| { |
|
1678 |
136
| while (node != null)
|
|
1679 |
| { |
|
1680 |
256
| if (node.tag == this.tt.tagI)
|
|
1681 |
| { |
|
1682 |
1
| node.element = this.tt.tagEm.name;
|
|
1683 |
1
| node.tag = this.tt.tagEm;
|
|
1684 |
| } |
|
1685 |
255
| else if (node.tag == this.tt.tagB)
|
|
1686 |
| { |
|
1687 |
0
| node.element = this.tt.tagStrong.name;
|
|
1688 |
0
| node.tag = this.tt.tagStrong;
|
|
1689 |
| } |
|
1690 |
| |
|
1691 |
256
| if (node.content != null)
|
|
1692 |
| { |
|
1693 |
127
| emFromI(node.content);
|
|
1694 |
| } |
|
1695 |
| |
|
1696 |
256
| node = node.next;
|
|
1697 |
| } |
|
1698 |
| } |
|
1699 |
| |
|
1700 |
| |
|
1701 |
| |
|
1702 |
| |
|
1703 |
| |
|
1704 |
| |
|
1705 |
5567
| public void list2BQ(Node node)
|
|
1706 |
| { |
|
1707 |
5567
| while (node != null)
|
|
1708 |
| { |
|
1709 |
12721
| if (node.content != null)
|
|
1710 |
| { |
|
1711 |
5348
| list2BQ(node.content);
|
|
1712 |
| } |
|
1713 |
| |
|
1714 |
12721
| if (node.tag != null
|
|
1715 |
| && node.tag.getParser() == ParserImpl.LIST |
|
1716 |
| && node.hasOneChild() |
|
1717 |
| && node.content.implicit) |
|
1718 |
| { |
|
1719 |
3
| stripOnlyChild(node);
|
|
1720 |
3
| node.element = this.tt.tagBlockquote.name;
|
|
1721 |
3
| node.tag = this.tt.tagBlockquote;
|
|
1722 |
3
| node.implicit = true;
|
|
1723 |
| } |
|
1724 |
| |
|
1725 |
12721
| node = node.next;
|
|
1726 |
| } |
|
1727 |
| } |
|
1728 |
| |
|
1729 |
| |
|
1730 |
| |
|
1731 |
| |
|
1732 |
| |
|
1733 |
| |
|
1734 |
5564
| public void bQ2Div(Node node)
|
|
1735 |
| { |
|
1736 |
5564
| int indent;
|
|
1737 |
5564
| String indentBuf;
|
|
1738 |
5564
| AttVal attval;
|
|
1739 |
| |
|
1740 |
5564
| while (node != null)
|
|
1741 |
| { |
|
1742 |
12718
| if (node.tag == this.tt.tagBlockquote && node.implicit)
|
|
1743 |
| { |
|
1744 |
3
| indent = 1;
|
|
1745 |
| |
|
1746 |
3
| while (node.hasOneChild() && node.content.tag == this.tt.tagBlockquote && node.implicit)
|
|
1747 |
| { |
|
1748 |
0
| ++indent;
|
|
1749 |
0
| stripOnlyChild(node);
|
|
1750 |
| } |
|
1751 |
| |
|
1752 |
3
| if (node.content != null)
|
|
1753 |
| { |
|
1754 |
3
| bQ2Div(node.content);
|
|
1755 |
| } |
|
1756 |
| |
|
1757 |
3
| indentBuf = "margin-left: " + (new Integer(2 * indent)).toString() + "em";
|
|
1758 |
| |
|
1759 |
3
| node.element = this.tt.tagDiv.name;
|
|
1760 |
3
| node.tag = this.tt.tagDiv;
|
|
1761 |
| |
|
1762 |
3
| attval = node.getAttrByName("style");
|
|
1763 |
| |
|
1764 |
3
| if (attval != null && attval.value != null)
|
|
1765 |
| { |
|
1766 |
2
| attval.value = indentBuf + "; " + attval.value;
|
|
1767 |
| } |
|
1768 |
| else |
|
1769 |
| { |
|
1770 |
1
| node.addAttribute("style", indentBuf);
|
|
1771 |
| } |
|
1772 |
| } |
|
1773 |
12715
| else if (node.content != null)
|
|
1774 |
| { |
|
1775 |
5342
| bQ2Div(node.content);
|
|
1776 |
| } |
|
1777 |
| |
|
1778 |
12718
| node = node.next;
|
|
1779 |
| } |
|
1780 |
| } |
|
1781 |
| |
|
1782 |
| |
|
1783 |
| |
|
1784 |
| |
|
1785 |
| |
|
1786 |
| |
|
1787 |
0
| Node findEnclosingCell(Node node)
|
|
1788 |
| { |
|
1789 |
0
| Node check;
|
|
1790 |
| |
|
1791 |
0
| for (check = node; check != null; check = check.parent)
|
|
1792 |
| { |
|
1793 |
0
| if (check.tag == tt.tagTd)
|
|
1794 |
| { |
|
1795 |
0
| return check;
|
|
1796 |
| } |
|
1797 |
| } |
|
1798 |
0
| return null;
|
|
1799 |
| } |
|
1800 |
| |
|
1801 |
| |
|
1802 |
| |
|
1803 |
| |
|
1804 |
| |
|
1805 |
| |
|
1806 |
| |
|
1807 |
23
| public Node pruneSection(Lexer lexer, Node node)
|
|
1808 |
| { |
|
1809 |
23
| for (;;)
|
|
1810 |
| { |
|
1811 |
| |
|
1812 |
| |
|
1813 |
| |
|
1814 |
| |
|
1815 |
| |
|
1816 |
| |
|
1817 |
| |
|
1818 |
| |
|
1819 |
| |
|
1820 |
| |
|
1821 |
| |
|
1822 |
| |
|
1823 |
| |
|
1824 |
| |
|
1825 |
| |
|
1826 |
| |
|
1827 |
62
| node = Node.discardElement(node);
|
|
1828 |
| |
|
1829 |
62
| if (node == null)
|
|
1830 |
| { |
|
1831 |
0
| return null;
|
|
1832 |
| } |
|
1833 |
| |
|
1834 |
62
| if (node.type == Node.SECTION_TAG)
|
|
1835 |
| { |
|
1836 |
23
| if ((TidyUtils.getString(node.textarray, node.start, 2)).equals("if"))
|
|
1837 |
| { |
|
1838 |
0
| node = pruneSection(lexer, node);
|
|
1839 |
0
| continue;
|
|
1840 |
| } |
|
1841 |
| |
|
1842 |
23
| if ((TidyUtils.getString(node.textarray, node.start, 5)).equals("endif"))
|
|
1843 |
| { |
|
1844 |
23
| node = Node.discardElement(node);
|
|
1845 |
23
| break;
|
|
1846 |
| } |
|
1847 |
| } |
|
1848 |
| } |
|
1849 |
| |
|
1850 |
23
| return node;
|
|
1851 |
| } |
|
1852 |
| |
|
1853 |
| |
|
1854 |
| |
|
1855 |
| |
|
1856 |
| |
|
1857 |
| |
|
1858 |
130
| public void dropSections(Lexer lexer, Node node)
|
|
1859 |
| { |
|
1860 |
130
| while (node != null)
|
|
1861 |
| { |
|
1862 |
253
| if (node.type == Node.SECTION_TAG)
|
|
1863 |
| { |
|
1864 |
| |
|
1865 |
25
| if ((TidyUtils.getString(node.textarray, node.start, 2)).equals("if")
|
|
1866 |
| && (!(TidyUtils.getString(node.textarray, node.start, 7)).equals("if !vml"))) |
|
1867 |
| |
|
1868 |
| { |
|
1869 |
23
| node = pruneSection(lexer, node);
|
|
1870 |
23
| continue;
|
|
1871 |
| } |
|
1872 |
| |
|
1873 |
| |
|
1874 |
2
| node = Node.discardElement(node);
|
|
1875 |
2
| continue;
|
|
1876 |
| } |
|
1877 |
| |
|
1878 |
228
| if (node.content != null)
|
|
1879 |
| { |
|
1880 |
124
| dropSections(lexer, node.content);
|
|
1881 |
| } |
|
1882 |
| |
|
1883 |
228
| node = node.next;
|
|
1884 |
| } |
|
1885 |
| } |
|
1886 |
| |
|
1887 |
| |
|
1888 |
| |
|
1889 |
| |
|
1890 |
| |
|
1891 |
156
| public void purgeWord2000Attributes(Node node)
|
|
1892 |
| { |
|
1893 |
156
| AttVal attr = null;
|
|
1894 |
156
| AttVal next = null;
|
|
1895 |
156
| AttVal prev = null;
|
|
1896 |
| |
|
1897 |
156
| for (attr = node.attributes; attr != null; attr = next)
|
|
1898 |
| { |
|
1899 |
96
| next = attr.next;
|
|
1900 |
| |
|
1901 |
| |
|
1902 |
| |
|
1903 |
96
| if (attr.attribute != null && attr.value != null && attr.attribute.equals("class"))
|
|
1904 |
| { |
|
1905 |
45
| if (attr.value.equals("Code") || !attr.value.startsWith("Mso"))
|
|
1906 |
| { |
|
1907 |
3
| prev = attr;
|
|
1908 |
3
| continue;
|
|
1909 |
| } |
|
1910 |
| } |
|
1911 |
| |
|
1912 |
93
| if (attr.attribute != null
|
|
1913 |
| && (attr.attribute.equals("class") |
|
1914 |
| || attr.attribute.equals("style") |
|
1915 |
| || attr.attribute.equals("lang") |
|
1916 |
| || attr.attribute.startsWith("x:") || ((attr.attribute.equals("height") || attr.attribute |
|
1917 |
| .equals("width")) && |
|
1918 |
| (node.tag == this.tt.tagTd || node.tag == this.tt.tagTr || node.tag == this.tt.tagTh)))) |
|
1919 |
| { |
|
1920 |
79
| if (prev != null)
|
|
1921 |
| { |
|
1922 |
4
| prev.next = next;
|
|
1923 |
| } |
|
1924 |
| else |
|
1925 |
| { |
|
1926 |
75
| node.attributes = next;
|
|
1927 |
| } |
|
1928 |
| |
|
1929 |
| } |
|
1930 |
| else |
|
1931 |
| { |
|
1932 |
14
| prev = attr;
|
|
1933 |
| } |
|
1934 |
| } |
|
1935 |
| } |
|
1936 |
| |
|
1937 |
| |
|
1938 |
| |
|
1939 |
| |
|
1940 |
| |
|
1941 |
| |
|
1942 |
| |
|
1943 |
32
| public Node stripSpan(Lexer lexer, Node span)
|
|
1944 |
| { |
|
1945 |
32
| Node node;
|
|
1946 |
32
| Node prev = null;
|
|
1947 |
32
| Node content;
|
|
1948 |
| |
|
1949 |
| |
|
1950 |
| |
|
1951 |
| |
|
1952 |
32
| cleanWord2000(lexer, span.content);
|
|
1953 |
32
| content = span.content;
|
|
1954 |
| |
|
1955 |
32
| if (span.prev != null)
|
|
1956 |
| { |
|
1957 |
14
| prev = span.prev;
|
|
1958 |
| } |
|
1959 |
18
| else if (content != null)
|
|
1960 |
| { |
|
1961 |
12
| node = content;
|
|
1962 |
12
| content = content.next;
|
|
1963 |
12
| node.removeNode();
|
|
1964 |
12
| Node.insertNodeBeforeElement(span, node);
|
|
1965 |
12
| prev = node;
|
|
1966 |
| } |
|
1967 |
| |
|
1968 |
32
| while (content != null)
|
|
1969 |
| { |
|
1970 |
28
| node = content;
|
|
1971 |
28
| content = content.next;
|
|
1972 |
28
| node.removeNode();
|
|
1973 |
28
| prev.insertNodeAfterElement(node);
|
|
1974 |
28
| prev = node;
|
|
1975 |
| } |
|
1976 |
| |
|
1977 |
32
| if (span.next == null)
|
|
1978 |
| { |
|
1979 |
19
| span.parent.last = prev;
|
|
1980 |
| } |
|
1981 |
| |
|
1982 |
32
| node = span.next;
|
|
1983 |
32
| span.content = null;
|
|
1984 |
32
| Node.discardElement(span);
|
|
1985 |
32
| return node;
|
|
1986 |
| } |
|
1987 |
| |
|
1988 |
| |
|
1989 |
| |
|
1990 |
| |
|
1991 |
| |
|
1992 |
| |
|
1993 |
0
| private void normalizeSpaces(Lexer lexer, Node node)
|
|
1994 |
| { |
|
1995 |
0
| while (node != null)
|
|
1996 |
| { |
|
1997 |
0
| if (node.content != null)
|
|
1998 |
| { |
|
1999 |
0
| normalizeSpaces(lexer, node.content);
|
|
2000 |
| } |
|
2001 |
| |
|
2002 |
0
| if (node.type == Node.TEXT_NODE)
|
|
2003 |
| { |
|
2004 |
0
| int i;
|
|
2005 |
0
| int[] c = new int[1];
|
|
2006 |
0
| int p = node.start;
|
|
2007 |
| |
|
2008 |
0
| for (i = node.start; i < node.end; ++i)
|
|
2009 |
| { |
|
2010 |
0
| c[0] = node.textarray[i];
|
|
2011 |
| |
|
2012 |
| |
|
2013 |
0
| if (c[0] > 0x7F)
|
|
2014 |
| { |
|
2015 |
0
| i += PPrint.getUTF8(node.textarray, i, c);
|
|
2016 |
| } |
|
2017 |
| |
|
2018 |
0
| if (c[0] == 160)
|
|
2019 |
| { |
|
2020 |
0
| c[0] = ' ';
|
|
2021 |
| } |
|
2022 |
| |
|
2023 |
0
| p = PPrint.putUTF8(node.textarray, p, c[0]);
|
|
2024 |
| } |
|
2025 |
| } |
|
2026 |
| |
|
2027 |
0
| node = node.next;
|
|
2028 |
| } |
|
2029 |
| } |
|
2030 |
| |
|
2031 |
| |
|
2032 |
| |
|
2033 |
| |
|
2034 |
| |
|
2035 |
| |
|
2036 |
37
| boolean noMargins(Node node)
|
|
2037 |
| { |
|
2038 |
37
| AttVal attval = node.getAttrByName("style");
|
|
2039 |
| |
|
2040 |
37
| if (attval == null || attval.value == null)
|
|
2041 |
| { |
|
2042 |
24
| return false;
|
|
2043 |
| } |
|
2044 |
| |
|
2045 |
| |
|
2046 |
13
| if (attval.value.indexOf("margin-top: 0") == -1)
|
|
2047 |
| { |
|
2048 |
13
| return false;
|
|
2049 |
| } |
|
2050 |
| |
|
2051 |
| |
|
2052 |
0
| if (attval.value.indexOf("margin-bottom: 0") == -1)
|
|
2053 |
| { |
|
2054 |
0
| return false;
|
|
2055 |
| } |
|
2056 |
| |
|
2057 |
0
| return true;
|
|
2058 |
| } |
|
2059 |
| |
|
2060 |
| |
|
2061 |
| |
|
2062 |
| |
|
2063 |
| |
|
2064 |
| |
|
2065 |
| |
|
2066 |
46
| boolean singleSpace(Lexer lexer, Node node)
|
|
2067 |
| { |
|
2068 |
46
| if (node.content != null)
|
|
2069 |
| { |
|
2070 |
45
| node = node.content;
|
|
2071 |
| |
|
2072 |
45
| if (node.next != null)
|
|
2073 |
| { |
|
2074 |
8
| return false;
|
|
2075 |
| } |
|
2076 |
| |
|
2077 |
37
| if (node.type != Node.TEXT_NODE)
|
|
2078 |
| { |
|
2079 |
14
| return false;
|
|
2080 |
| } |
|
2081 |
| |
|
2082 |
23
| if (((node.end - node.start) == 1) && lexer.lexbuf[node.start] == ' ')
|
|
2083 |
| { |
|
2084 |
0
| return true;
|
|
2085 |
| } |
|
2086 |
| |
|
2087 |
23
| if ((node.end - node.start) == 2)
|
|
2088 |
| { |
|
2089 |
8
| int[] c = new int[1];
|
|
2090 |
| |
|
2091 |
8
| PPrint.getUTF8(lexer.lexbuf, node.start, c);
|
|
2092 |
| |
|
2093 |
8
| if (c[0] == 160)
|
|
2094 |
| { |
|
2095 |
5
| return true;
|
|
2096 |
| } |
|
2097 |
| } |
|
2098 |
| } |
|
2099 |
| |
|
2100 |
19
| return false;
|
|
2101 |
| } |
|
2102 |
| |
|
2103 |
| |
|
2104 |
| |
|
2105 |
| |
|
2106 |
| |
|
2107 |
| |
|
2108 |
| |
|
2109 |
| |
|
2110 |
198
| public void cleanWord2000(Lexer lexer, Node node)
|
|
2111 |
| { |
|
2112 |
| |
|
2113 |
198
| Node list = null;
|
|
2114 |
| |
|
2115 |
198
| while (node != null)
|
|
2116 |
| { |
|
2117 |
| |
|
2118 |
| |
|
2119 |
340
| if (node.tag == tt.tagHtml)
|
|
2120 |
| { |
|
2121 |
| |
|
2122 |
6
| if ((node.getAttrByName("xmlns:o") == null))
|
|
2123 |
| { |
|
2124 |
0
| return;
|
|
2125 |
| } |
|
2126 |
6
| lexer.configuration.tt.freeAttrs(node);
|
|
2127 |
| } |
|
2128 |
| |
|
2129 |
| |
|
2130 |
340
| if (node.tag == tt.tagP)
|
|
2131 |
| { |
|
2132 |
37
| if (noMargins(node))
|
|
2133 |
| { |
|
2134 |
0
| Node pre;
|
|
2135 |
0
| Node next;
|
|
2136 |
0
| Node.coerceNode(lexer, node, tt.tagPre);
|
|
2137 |
| |
|
2138 |
0
| purgeWord2000Attributes(node);
|
|
2139 |
| |
|
2140 |
0
| if (node.content != null)
|
|
2141 |
| { |
|
2142 |
0
| cleanWord2000(lexer, node.content);
|
|
2143 |
| } |
|
2144 |
| |
|
2145 |
0
| pre = node;
|
|
2146 |
0
| node = node.next;
|
|
2147 |
| |
|
2148 |
| |
|
2149 |
0
| while (node.tag == tt.tagP && noMargins(node))
|
|
2150 |
| { |
|
2151 |
0
| next = node.next;
|
|
2152 |
0
| node.removeNode();
|
|
2153 |
0
| pre.insertNodeAtEnd(lexer.newLineNode());
|
|
2154 |
0
| pre.insertNodeAtEnd(node);
|
|
2155 |
0
| stripSpan(lexer, node);
|
|
2156 |
0
| node = next;
|
|
2157 |
| } |
|
2158 |
| |
|
2159 |
0
| if (node == null)
|
|
2160 |
| { |
|
2161 |
0
| break;
|
|
2162 |
| } |
|
2163 |
| } |
|
2164 |
| } |
|
2165 |
| |
|
2166 |
340
| if (node.tag != null && TidyUtils.toBoolean(node.tag.model & Dict.CM_BLOCK) && singleSpace(lexer, node))
|
|
2167 |
| { |
|
2168 |
5
| node = stripSpan(lexer, node);
|
|
2169 |
5
| continue;
|
|
2170 |
| } |
|
2171 |
| |
|
2172 |
| |
|
2173 |
335
| if (node.tag == this.tt.tagStyle || node.tag == this.tt.tagMeta || node.type == Node.COMMENT_TAG)
|
|
2174 |
| { |
|
2175 |
29
| node = Node.discardElement(node);
|
|
2176 |
29
| continue;
|
|
2177 |
| } |
|
2178 |
| |
|
2179 |
| |
|
2180 |
306
| if (node.tag == this.tt.tagSpan || node.tag == this.tt.tagFont)
|
|
2181 |
| { |
|
2182 |
27
| node = stripSpan(lexer, node);
|
|
2183 |
27
| continue;
|
|
2184 |
| } |
|
2185 |
| |
|
2186 |
279
| if (node.tag == this.tt.tagLink)
|
|
2187 |
| { |
|
2188 |
5
| AttVal attr = node.getAttrByName("rel");
|
|
2189 |
| |
|
2190 |
5
| if (attr != null && attr.value != null && attr.value.equals("File-List"))
|
|
2191 |
| { |
|
2192 |
4
| node = Node.discardElement(node);
|
|
2193 |
4
| continue;
|
|
2194 |
| } |
|
2195 |
| } |
|
2196 |
| |
|
2197 |
| |
|
2198 |
275
| if (node.content == null && node.tag == this.tt.tagP)
|
|
2199 |
| { |
|
2200 |
1
| node = Node.discardElement(node);
|
|
2201 |
1
| continue;
|
|
2202 |
| } |
|
2203 |
| |
|
2204 |
274
| if (node.tag == this.tt.tagP)
|
|
2205 |
| { |
|
2206 |
31
| AttVal attr = node.getAttrByName("class");
|
|
2207 |
31
| AttVal atrStyle = node.getAttrByName("style");
|
|
2208 |
| |
|
2209 |
| |
|
2210 |
| |
|
2211 |
| |
|
2212 |
| |
|
2213 |
| |
|
2214 |
| |
|
2215 |
31
| if (attr != null
|
|
2216 |
| && attr.value != null |
|
2217 |
| && ((attr.value.equals("MsoListBullet") || attr.value.equals("MsoListNumber")) |
|
2218 |
| || (atrStyle != null && (atrStyle.value.indexOf("mso-list:") != -1)))) |
|
2219 |
| |
|
2220 |
| { |
|
2221 |
15
| Dict listType = tt.tagUl;
|
|
2222 |
| |
|
2223 |
15
| if (attr.value.equals("MsoListNumber"))
|
|
2224 |
| { |
|
2225 |
0
| listType = tt.tagOl;
|
|
2226 |
| } |
|
2227 |
| |
|
2228 |
15
| Node.coerceNode(lexer, node, this.tt.tagLi);
|
|
2229 |
| |
|
2230 |
15
| if (list == null || list.tag != listType)
|
|
2231 |
| { |
|
2232 |
3
| list = lexer.inferredTag(listType.name);
|
|
2233 |
3
| Node.insertNodeBeforeElement(node, list);
|
|
2234 |
| } |
|
2235 |
| |
|
2236 |
15
| purgeWord2000Attributes(node);
|
|
2237 |
| |
|
2238 |
15
| if (node.content != null)
|
|
2239 |
| { |
|
2240 |
15
| cleanWord2000(lexer, node.content);
|
|
2241 |
| } |
|
2242 |
| |
|
2243 |
| |
|
2244 |
15
| node.removeNode();
|
|
2245 |
15
| list.insertNodeAtEnd(node);
|
|
2246 |
15
| node = list;
|
|
2247 |
| } |
|
2248 |
| |
|
2249 |
16
| else if (attr != null && attr.value != null && attr.value.equals("Code"))
|
|
2250 |
| { |
|
2251 |
0
| Node br = lexer.newLineNode();
|
|
2252 |
0
| normalizeSpaces(lexer, node);
|
|
2253 |
| |
|
2254 |
0
| if (list == null || list.tag != this.tt.tagPre)
|
|
2255 |
| { |
|
2256 |
0
| list = lexer.inferredTag("pre");
|
|
2257 |
0
| Node.insertNodeBeforeElement(node, list);
|
|
2258 |
| } |
|
2259 |
| |
|
2260 |
| |
|
2261 |
0
| node.removeNode();
|
|
2262 |
0
| list.insertNodeAtEnd(node);
|
|
2263 |
0
| stripSpan(lexer, node);
|
|
2264 |
0
| list.insertNodeAtEnd(br);
|
|
2265 |
0
| node = list.next;
|
|
2266 |
| } |
|
2267 |
| else |
|
2268 |
| { |
|
2269 |
16
| list = null;
|
|
2270 |
| } |
|
2271 |
| } |
|
2272 |
| else |
|
2273 |
| { |
|
2274 |
243
| list = null;
|
|
2275 |
| } |
|
2276 |
| |
|
2277 |
| |
|
2278 |
274
| if (node.type == Node.START_TAG || node.type == Node.START_END_TAG)
|
|
2279 |
| { |
|
2280 |
141
| purgeWord2000Attributes(node);
|
|
2281 |
| } |
|
2282 |
| |
|
2283 |
274
| if (node.content != null)
|
|
2284 |
| { |
|
2285 |
145
| cleanWord2000(lexer, node.content);
|
|
2286 |
| } |
|
2287 |
| |
|
2288 |
274
| node = node.next;
|
|
2289 |
| } |
|
2290 |
| } |
|
2291 |
| |
|
2292 |
| |
|
2293 |
| |
|
2294 |
| |
|
2295 |
| |
|
2296 |
| |
|
2297 |
8
| public boolean isWord2000(Node root)
|
|
2298 |
| { |
|
2299 |
8
| AttVal attval;
|
|
2300 |
8
| Node node;
|
|
2301 |
8
| Node head;
|
|
2302 |
8
| Node html = root.findHTML(this.tt);
|
|
2303 |
| |
|
2304 |
8
| if (html != null && html.getAttrByName("xmlns:o") != null)
|
|
2305 |
| { |
|
2306 |
6
| return true;
|
|
2307 |
| } |
|
2308 |
| |
|
2309 |
| |
|
2310 |
2
| head = root.findHEAD(tt);
|
|
2311 |
| |
|
2312 |
2
| if (head != null)
|
|
2313 |
| { |
|
2314 |
2
| for (node = head.content; node != null; node = node.next)
|
|
2315 |
| { |
|
2316 |
2
| if (node.tag != tt.tagMeta)
|
|
2317 |
| { |
|
2318 |
2
| continue;
|
|
2319 |
| } |
|
2320 |
| |
|
2321 |
0
| attval = node.getAttrByName("name");
|
|
2322 |
| |
|
2323 |
0
| if (attval == null || attval.value == null)
|
|
2324 |
| { |
|
2325 |
0
| continue;
|
|
2326 |
| } |
|
2327 |
| |
|
2328 |
0
| if (!"generator".equals(attval.value))
|
|
2329 |
| { |
|
2330 |
0
| continue;
|
|
2331 |
| } |
|
2332 |
| |
|
2333 |
0
| attval = node.getAttrByName("content");
|
|
2334 |
| |
|
2335 |
0
| if (attval == null || attval.value == null)
|
|
2336 |
| { |
|
2337 |
0
| continue;
|
|
2338 |
| } |
|
2339 |
| |
|
2340 |
0
| if (attval.value.indexOf("Microsoft") != -1)
|
|
2341 |
| { |
|
2342 |
0
| return true;
|
|
2343 |
| } |
|
2344 |
| } |
|
2345 |
| } |
|
2346 |
| |
|
2347 |
2
| return false;
|
|
2348 |
| } |
|
2349 |
| |
|
2350 |
| |
|
2351 |
| |
|
2352 |
| |
|
2353 |
| |
|
2354 |
| |
|
2355 |
217
| static void bumpObject(Lexer lexer, Node html)
|
|
2356 |
| { |
|
2357 |
217
| if (html == null)
|
|
2358 |
| { |
|
2359 |
0
| return;
|
|
2360 |
| } |
|
2361 |
| |
|
2362 |
217
| Node node, next, head = null, body = null;
|
|
2363 |
217
| TagTable tt = lexer.configuration.tt;
|
|
2364 |
217
| for (node = html.content; node != null; node = node.next)
|
|
2365 |
| { |
|
2366 |
437
| if (node.tag == tt.tagHead)
|
|
2367 |
| { |
|
2368 |
213
| head = node;
|
|
2369 |
| } |
|
2370 |
| |
|
2371 |
437
| if (node.tag == tt.tagBody)
|
|
2372 |
| { |
|
2373 |
218
| body = node;
|
|
2374 |
| } |
|
2375 |
| } |
|
2376 |
| |
|
2377 |
217
| if (head != null && body != null)
|
|
2378 |
| { |
|
2379 |
213
| for (node = head.content; node != null; node = next)
|
|
2380 |
| { |
|
2381 |
312
| next = node.next;
|
|
2382 |
| |
|
2383 |
312
| if (node.tag == tt.tagObject)
|
|
2384 |
| { |
|
2385 |
1
| Node child;
|
|
2386 |
1
| boolean bump = false;
|
|
2387 |
| |
|
2388 |
1
| for (child = node.content; child != null; child = child.next)
|
|
2389 |
| { |
|
2390 |
| |
|
2391 |
1
| if ((child.type == Node.TEXT_NODE && !node.isBlank(lexer)) || child.tag != tt.tagParam)
|
|
2392 |
| { |
|
2393 |
1
| bump = true;
|
|
2394 |
1
| break;
|
|
2395 |
| } |
|
2396 |
| } |
|
2397 |
| |
|
2398 |
1
| if (bump)
|
|
2399 |
| { |
|
2400 |
1
| node.removeNode();
|
|
2401 |
1
| body.insertNodeAtStart(node);
|
|
2402 |
| } |
|
2403 |
| } |
|
2404 |
| } |
|
2405 |
| } |
|
2406 |
| } |
|
2407 |
| |
|
2408 |
| } |