package com.jbidwatcher.util.html;
/*
* Copyright (c) 2000-2007, CyberFOX Software, Inc. All Rights Reserved.
*
* Developed by mrs (Morgan Schweers)
*/
import com.jbidwatcher.util.config.JConfig;
import com.jbidwatcher.util.xml.XMLElement;
import java.util.Vector;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Created by IntelliJ IDEA.
* User: Administrator
* Date: Jun 26, 2004
* Time: 2:34:56 PM
*
*/
public class JHTMLParser {
private List m_tokens;
private JHTMLListener m_notify = null;
private final static boolean do_uber_debug = false;
public JHTMLParser(StringBuffer sb, JHTMLListener notify) {
m_notify = notify;
setup();
parse(sb);
}
public JHTMLParser(JHTMLListener notify) {
m_notify = notify;
setup();
}
private void setup() {
m_tokens = new Vector();
}
protected void parse(StringBuffer trueBuffer) {
boolean inQuote=false, inTag=false, inComment=false;
char ch, prev = '\0', next = '\0';
StringBuffer sb;
Matcher m;
if(JConfig.queryConfiguration("ebay.titleFix", "true").equals("true")) {
sb = new StringBuffer(trueBuffer.length());
m = Pattern.compile("(.*)").matcher(trueBuffer);
String quotedTitle = null;
while(m.find()) {
if(quotedTitle == null) quotedTitle = "" + XMLElement.encodeString(m.group(1)) + "";
m.appendReplacement(sb, Matcher.quoteReplacement(quotedTitle));
}
m.appendTail(sb);
trueBuffer = sb;
}
sb = new StringBuffer(trueBuffer.length());
m = Pattern.compile("(|)").matcher(trueBuffer);
while(m.find()) {
m.appendReplacement(sb, "");
}
m.appendTail(sb);
trueBuffer = sb;
int bufLen = trueBuffer.length();
boolean spitNextTag = false;
int start = 0;
int firstClose = 0;
boolean suspicious = false;
for(int charStep = 0; charStep1) prev = trueBuffer.charAt(charStep-1);
if(charStep<(bufLen-1)) next = trueBuffer.charAt(charStep+1);
if(inTag) {
// quoting disabled inside of comment
if(!inComment) {
if(inQuote && ch == '>') {
suspicious = true;
if(JConfig.debugging) {
int pre_nl=0, post_nl=0, i;
for(i=charStep-1; pre_nl == 0 && i>0 && i>(charStep-40); i--) if(trueBuffer.charAt(i) == '\n') pre_nl = i+1;
if(pre_nl == 0) pre_nl = i;
for(i=charStep+1; post_nl == 0 && i1) prev = trueBuffer.charAt(charStep-1);
if(charStep<(bufLen-1)) next = trueBuffer.charAt(charStep+1);
inQuote = false;
suspicious = false;
JConfig.log().logDebug("Potential quote error!");
spitNextTag = true;
}
// This prevents opening a quote at the end of a tag.
if(!inQuote && prev != '=' && next == '>') {
if(JConfig.queryConfiguration("show.badhtml", "false").equals("true")) {
JConfig.log().logDebug("Quote error!");
}
spitNextTag = true;
} else {
inQuote = !inQuote;
}
}
}
// parsing disabled inside of quoted string
if(!inQuote) {
// end Tag and start Content
if(ch == '>') {
if(!inComment) {
// We've ended a tag, outside a quote. It's all good.
if(suspicious) suspicious = false;
if(charStep < start) {
if(do_uber_debug) {
JConfig.log().logDebug("substring(" + start + ", " + charStep + ") of " + trueBuffer.length());
JConfig.log().logDebug("FAILURE @\n-------------------\n" + trueBuffer.substring(charStep, start));
}
}
addToken(trueBuffer.substring(start, charStep), htmlToken.HTML_TAG);
if(spitNextTag) {
if(JConfig.queryConfiguration("show.badhtml", "false").equals("true")) {
JConfig.log().logDebug("Added 'bad' tag: <" + trueBuffer.substring(start, charStep) + ">");
}
spitNextTag = false;
}
} else {
// Comment ends with "-->"
inComment = (prev != '-') || (trueBuffer.charAt(charStep-2) != '-');
}
inTag = inComment;
if(!inTag) start = charStep+1; // start of content
}
}
} else {
// in Content
if(ch == '<') {
// end Content and start Tag
if(start != charStep) {
String whatToAdd = trueBuffer.substring(start, charStep);
String trimmed = whatToAdd.trim();
if(!trimmed.equals("")) {
addToken(whatToAdd, htmlToken.HTML_CONTENT);
}
}
inTag = true;
// Comments begin with "