/*
 * AnalyseResponse.java
 *
 * Created on March 3, 2003, 10:28 PM
 */

package za.org.dragon.exodus;

import java.util.Vector;
import java.util.TreeMap;
import java.util.Collections;

import java.io.*;
import java.net.*;
import java.util.*;
import javax.swing.*;
import javax.swing.tree.*;
import javax.swing.text.*;
import javax.swing.text.html.*;
import javax.swing.text.html.parser.*;

/**
 *
 * @author  rdawes
 */
public class AnalyseResponse {
    
    private Response response;
    private URL url;
    private byte[] content;
    private Vector scripts = null;
    private Vector comments = null;
    private Vector forms = null;
    private Map formaction = null;
    private Map links = null;
    private boolean canParse = false;
    
    /** Creates a new instance of AnalyseContent */
    public AnalyseResponse(Response r, URL url) {
        this.response = r;
        this.url = url;
        if (r.getStatus().matches("2\\d\\d")) {
            String ct = r.getHeader("Content-Type");
            if (ct == null) {
                return;
            }
            if (ct.matches(".*/html.*")) { // the ending .* allows a character set. FIXME, do it properly
                canParse = true;
            } else if (ct.matches("text/.*script")) {
                canParse = false;
                if (scripts == null) {
                    scripts = new Vector(1);
                }
                byte[] content = r.getContent();
                if (content == null || content.length <= 0) {
                    System.out.println("Tried to parse a script file, but the content was empty!");
                    return;
                }
                scripts.add(new Fragment(new String(content)));
            }
        } else if (r.getStatus().equals("301") || r.getStatus().equals("302")) {
            canParse = false;
            String loc = r.getHeader("Location");
            if (loc == null) {
                return;
            }
            try {
                URL u = null;
                u = new URL(url,loc);
                if (links == null) {
                    links = Collections.synchronizedMap(new TreeMap());
                }
                if (!links.containsKey(loc)) {
                    links.put(loc, u);
                }
            } catch (MalformedURLException mue) {
                System.out.println("Can't handle an URL like " + url + " : " + loc);
            }
        }
    }
    
    protected void parse() {
        if (!canParse) {
            return;
        }
        try {
            BufferedReader reader = new BufferedReader(new StringReader(new String(response.getContent())));
            new ParserDelegator().parse(reader, new CallbackHandler(), true);
            int i = 0;
        } catch (Exception e) {
            System.out.println("Exception in AnalyseResponse.parse: " + e);
            e.printStackTrace();
            System.out.println("URL was " + url + " in AnalyseResponse.parse");
        }
    }
    
    protected URL[] getLinks() {
        if (links == null) {
            return null;
        }
        URL[] urls = new URL[links.keySet().size()];
        int i=0;
        for (Iterator e = links.keySet().iterator() ; e.hasNext(); ) {
            String href= (String) e.next();
            urls[i] = (URL)links.get(href);
            i++;
        }
        return urls;
    }
    
    protected URL[] getFormAction() {
        if (formaction == null) {
            return null;
        }
        URL[] urls = new URL[formaction.keySet().size()];
        int i=0;
        for (Iterator e = formaction.keySet().iterator() ; e.hasNext(); ) {
            String href= (String) e.next();
            urls[i] = (URL)formaction.get(href);
            i++;
        }
        return urls;
    }

    protected Fragment[] getForms() {
        if (forms == null || forms.size() <= 0) {
            return null;
        }
        Fragment[] data = new Fragment[forms.size()];
        for (int i=0; i<forms.size(); i++) {
            data[i] = (Fragment) forms.get(i);
        }
        return data;
    }
    
    protected Fragment[] getScripts() {
        if (scripts == null || scripts.size() <= 0) {
            return null;
        }
        Fragment[] data = new Fragment[scripts.size()];
        for (int i=0; i<scripts.size(); i++) {
            data[i] = (Fragment) scripts.get(i);
        }
        return data;
    }
    
    protected Fragment[] getComments() {
        if (comments == null || comments.size() <= 0) {
            return null;
        }
        Fragment[] data = new Fragment[comments.size()];
        for (int i=0; i<comments.size(); i++) {
            data[i] = (Fragment)comments.get(i);
        }
        return data;
    }
    
    
    /** FIXME This is HORRIBLY BROKEN!!!
     *  The callbacks happen out of sequence if there is any funny business with the HTML, and e.g. FORMS
     *  do not get the right tags inside the form, but get them added after the form has been "added"
     *  to the list. e.g. see http://www.absa.co.za/ :-(
     *
     *  I think it happens with scripts as well, if they are not commented out with <!-- --> comment tags :-(
     *
     *  We probably need to implement a "dumb" parser, that knows nothing about structure, and nesting
     *  etc, but just reports tags as it sees them, without trying to get the ordering right.
     */
    
    private class CallbackHandler extends HTMLEditorKit.ParserCallback {
        private boolean formtag = false;    // indicates that the current tag is a form tag
        private boolean scripttag = false;  // indicates that we are reading a script
        private boolean styletag = false;   // indicates that we are reading a stylesheet
        private String comment = "";        // contains the constructed comment
        private String script = "";         // contains the constructed script
        private String form = "";           // contains the constructed form
        
        /**
         * Creates the CallbackHandler.
         */
        public CallbackHandler() {
        }
        
        //
        // HTMLEditorKit.ParserCallback methods
        //
        
        public void handleText(char[] data, int pos) {
            //            System.out.println("TEXT : " + pos);
            if (scripttag) {
                script = script + new String(data);
            }
        }
        
        /**
         * Invoked when a start tag is encountered.
         */
        public void handleStartTag(HTML.Tag t, MutableAttributeSet a,
        int pos) {
            //            System.out.println("StartTag("+t+") : " + pos);
            formtag = false;
            scripttag = false;
            // we make a string representation of this tag
            String tag = "<" + t.toString();
            if (t == HTML.Tag.FORM) {
                formtag = true;
                form = "";
            } else if (t == HTML.Tag.INPUT ||
            t == HTML.Tag.SELECT ||
            t == HTML.Tag.OPTION ||
            t == HTML.Tag.TEXTAREA ) {
                formtag = true;
            } else if (t == HTML.Tag.SCRIPT) {
                scripttag = true;
            } else if (t == HTML.Tag.STYLE) {
                styletag = true;
            }
            
            Enumeration e = a.getAttributeNames();
            while (e.hasMoreElements()) {
                Object o = e.nextElement();
                String value = a.getAttribute(o).toString();
                String attr = o.toString();
                
                String quot = "\"";
                if (value.indexOf(quot)>-1) {
                    quot = "'";
                }
                tag = tag + " " + attr + "=" + quot + value + quot;
                if (attr.equalsIgnoreCase("href") || (formtag && attr.equalsIgnoreCase("action"))) {
                    if (value.indexOf("script:")>-1) {
                        // the href is a script
                        scripttag = true;
                    } else if (value.indexOf("mailto://")>-1) {
                        //skip it
                    } else {
                        try {
                            URL u = null;
                            if (value.startsWith("?")) {
                                u = new URL(url.toString() + value);
                            } else {
                                u = new URL(url,value);
                            }
                            if (links == null) {
                                links = Collections.synchronizedMap(new TreeMap());
                            }
                            if (!links.containsKey(value)) {
                                links.put(value, u);
                            }
                            if (attr.equalsIgnoreCase("action")) {
                                if (formaction == null) {
                                    formaction = Collections.synchronizedMap(new TreeMap());
                                }
                                if (! formaction.containsKey(value)) {
                                    formaction.put(value,u);
                                }
                            }
                        } catch (MalformedURLException mue) {
                            System.out.println("Can't handle an URL like " + url + " : " + value);
                        }
                    }
                } else if (attr.length()>2 && attr.substring(1,2).equalsIgnoreCase("on")) {
                    // the tag includes an on... event handler, which must be a script
                    scripttag = true;
                }
            }
            tag = tag + ">";
            if (formtag) {
                form = form + tag + "\n";
            }
            if (scripttag) {
                script = tag;
                if (t != HTML.Tag.SCRIPT) {
                    // This is just a tag that has a script within it, likely as an event handler
                    // save it and move on
                    if (scripts == null) {
                        scripts = new Vector(1);
                    }
                    scripts.add(new Fragment(script));
                    script = "";
                    scripttag = false;
                } else {
                    script = script + "\n";
                }
            }
        }
        
        public void handleSimpleTag(HTML.Tag t, MutableAttributeSet a, int pos) {
            //            System.out.println("SimpleTag("+t+") : " + pos);
            formtag = false;
            String tag = "<" + t.toString();
            if (t == HTML.Tag.INPUT ||
            t == HTML.Tag.SELECT ||
            t == HTML.Tag.OPTION ||
            t == HTML.Tag.TEXTAREA ) {
                formtag = true;
            }
            Enumeration e = a.getAttributeNames();
            while (e.hasMoreElements()) {
                Object o = e.nextElement();
                String value = (String) a.getAttribute(o);
                String attr = o.toString();
                tag = tag + " " + attr + "=\"" + value + "\"";
                if (attr.equalsIgnoreCase("href") || (formtag && attr.equalsIgnoreCase("action"))) {
                    if (value.indexOf("script:")>-1) {
                        // the href is a script
                        scripttag = true;
                    } else {
                        try {
                            URL u = null;
                            if (value.startsWith("?")) {
                                u = new URL(url.toString() + value);
                            } else {
                                u = new URL(url,value);
                            }
                            if (links == null) {
                                links = Collections.synchronizedMap(new TreeMap());
                            }
                            if (!links.containsKey(value)) {
                                links.put(value, u);
                            }
                            if (attr.equalsIgnoreCase("action")) {
                                if (formaction == null) {
                                    formaction = Collections.synchronizedMap(new TreeMap());
                                }
                                if (! formaction.containsKey(value)) {
                                    formaction.put(value,u);
                                }
                            }
                        } catch (MalformedURLException mue) {
                            System.out.println("Can't handle an URL like " + url + " : " + value);
                        }
                    }
                } else if ((attr.length() > 2) && attr.substring(0,2).equalsIgnoreCase("on")) {
                    // the tag includes an on... event handler, which must be a script
                    scripttag = true;
                }
            }
            tag = tag + ">";
            if (formtag) {
                form = form + tag + "\n";
            }
            if (scripttag) {
                script = tag;
                // This is just a tag that has a script within it, likely as an event handler
                // save it and move on
                if (scripts == null) {
                    scripts = new Vector(1);
                }
                scripts.add(new Fragment(script));
                script = "";
                scripttag = false;
            }
        }
        
        /**
         * Invoked when the end of a tag is encountered.
         */
        public void handleEndTag(HTML.Tag t, int pos) {
            //            System.out.println("EndTag("+t+") : " + pos);
            String tag = "</" + t.toString() + ">";
            if (t == HTML.Tag.INPUT ||
            t == HTML.Tag.SELECT ||
            t == HTML.Tag.OPTION ||
            t == HTML.Tag.TEXTAREA ) {
                form = form + tag + "\n";
            } else if (t == HTML.Tag.FORM) {
                form = form + tag;
                if (forms == null) {
                    forms = new Vector(1);
                }
                forms.add(new Fragment(form));
            } else if (t == HTML.Tag.SCRIPT) {
                script = script + tag;
                if (scripts == null) {
                    scripts = new Vector(1);
                }
                scripts.add(new Fragment(script));
                script = "";
                scripttag = false;
            } else if (t == HTML.Tag.STYLE) {
                styletag = false;
            }
        }
        
        public void handleComment(char[] data, int pos) {
            //            System.out.println("Comment : " + pos);
            String comment = "<!--" + new String(data) + "-->";
            if (scripttag) {
                script = script + comment + "\n";
            } else if (styletag) {
                // ignore it - styles are often put in a comment, but there is nothing interesting in a style sheet, is there?
            } else {
                if (comments == null) {
                    comments = new Vector(1);
                }
                comments.add(new Fragment(comment));
            }
        }
        
    }
    
    public static void main(String args[]) {
        BackingStore bs = new BackingStore("D:/temp/absa/");
        Conversation c = bs.readConversation("00036");
        AnalyseResponse ar = new AnalyseResponse(c.getServerResponse(),c.getClientRequest().getURL());
        ar.parse();
        URL[] urls = ar.getLinks();
        if (urls != null) {
            for (int i=0; i<urls.length; i++) {
                System.out.println("LINK : " + urls[i].toString());
            }
        }
        urls = ar.getFormAction();
        if (urls != null) {
            for (int i=0; i<urls.length; i++) {
                System.out.println("FORM Actions : " + urls[i].toString());
            }
        }
        Fragment[] forms = ar.getForms();
        if (forms != null) {
            for (int i=0; i<forms.length; i++) {
                System.out.println("Form : " + forms[i]);
                System.out.println("Form : " + forms[i].getBody());
            }
        }
        Fragment[] scripts = ar.getScripts();
        if (scripts != null) {
            System.out.println("Got Scripts : " + scripts.length);
            for (int i=0; i<scripts.length; i++) {
                System.out.println("Script : " + scripts[i].getBody());
            }
        }
        
        Fragment[] comments = ar.getComments();
        if (comments != null) {
            System.out.println("Got Comments : " + comments.length);
            for (int i=0; i<comments.length; i++) {
                System.out.println("Comment : " + comments[i].getBody());
            }
        }
    }
    
}
