package com.jbidwatcher.util.html; /* * Copyright (c) 2000-2007, CyberFOX Software, Inc. All Rights Reserved. * * Developed by mrs (Morgan Schweers) */ import com.jbidwatcher.util.config.JConfig; import com.jbidwatcher.util.xml.XMLElement; import java.util.Vector; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * Created by IntelliJ IDEA. * User: Administrator * Date: Jun 26, 2004 * Time: 2:34:56 PM * */ public class JHTMLParser { private List m_tokens; private JHTMLListener m_notify = null; private final static boolean do_uber_debug = false; public JHTMLParser(StringBuffer sb, JHTMLListener notify) { m_notify = notify; setup(); parse(sb); } public JHTMLParser(JHTMLListener notify) { m_notify = notify; setup(); } private void setup() { m_tokens = new Vector(); } protected void parse(StringBuffer trueBuffer) { boolean inQuote=false, inTag=false, inComment=false; char ch, prev = '\0', next = '\0'; StringBuffer sb; Matcher m; if(JConfig.queryConfiguration("ebay.titleFix", "true").equals("true")) { sb = new StringBuffer(trueBuffer.length()); m = Pattern.compile("(.*)").matcher(trueBuffer); String quotedTitle = null; while(m.find()) { if(quotedTitle == null) quotedTitle = "" + XMLElement.encodeString(m.group(1)) + ""; m.appendReplacement(sb, Matcher.quoteReplacement(quotedTitle)); } m.appendTail(sb); trueBuffer = sb; } sb = new StringBuffer(trueBuffer.length()); m = Pattern.compile("(|)").matcher(trueBuffer); while(m.find()) { m.appendReplacement(sb, ""); } m.appendTail(sb); trueBuffer = sb; int bufLen = trueBuffer.length(); boolean spitNextTag = false; int start = 0; int firstClose = 0; boolean suspicious = false; for(int charStep = 0; charStep1) prev = trueBuffer.charAt(charStep-1); if(charStep<(bufLen-1)) next = trueBuffer.charAt(charStep+1); if(inTag) { // quoting disabled inside of comment if(!inComment) { if(inQuote && ch == '>') { suspicious = true; if(JConfig.debugging) { int pre_nl=0, post_nl=0, i; for(i=charStep-1; pre_nl == 0 && i>0 && i>(charStep-40); i--) if(trueBuffer.charAt(i) == '\n') pre_nl = i+1; if(pre_nl == 0) pre_nl = i; for(i=charStep+1; post_nl == 0 && i') { if(JConfig.queryConfiguration("show.badhtml", "false").equals("true")) { JConfig.log().logDebug("Quote error!"); } spitNextTag = true; } else { inQuote = !inQuote; } } } // parsing disabled inside of quoted string if(!inQuote) { // end Tag and start Content if(ch == '>') { if(!inComment) { // We've ended a tag, outside a quote. It's all good. if(suspicious) suspicious = false; if(charStep < start) { if(do_uber_debug) { JConfig.log().logDebug("substring(" + start + ", " + charStep + ") of " + trueBuffer.length()); JConfig.log().logDebug("FAILURE @\n-------------------\n" + trueBuffer.substring(charStep, start)); } } addToken(trueBuffer.substring(start, charStep), htmlToken.HTML_TAG); if(spitNextTag) { if(JConfig.queryConfiguration("show.badhtml", "false").equals("true")) { JConfig.log().logDebug("Added 'bad' tag: <" + trueBuffer.substring(start, charStep) + ">"); } spitNextTag = false; } } else { // Comment ends with "-->" inComment = (prev != '-') || (trueBuffer.charAt(charStep-2) != '-'); } inTag = inComment; if(!inTag) start = charStep+1; // start of content } } } else { // in Content if(ch == '<') { // end Content and start Tag if(start != charStep) { String whatToAdd = trueBuffer.substring(start, charStep); String trimmed = whatToAdd.trim(); if(!trimmed.equals("")) { addToken(whatToAdd, htmlToken.HTML_CONTENT); } } inTag = true; // Comments begin with "