package com.manticore.etl;

import com.manticore.etl.ETLSource;
import com.manticore.http.HttpClientFactory;
import com.manticore.util.ThreadList;
import com.manticore.util.XMLTools;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Element;
import org.dom4j.Node;
import org.xml.sax.SAXException;

/* loaded from: input_file:com/manticore/etl/HTMLSource.class */
public class HTMLSource extends ETLSource {
    public static boolean writeTmpFiles = true;
    private final DefaultHttpClient client;
    private ArrayList<String> pageUrlList;
    private LinkedBlockingQueue<Object[]> data;
    private boolean moreData;
    public String url;
    public String xpath;
    public String regex;
    public String page;
    public HashMap<String, HTMLSourceField> fields;

    /* loaded from: input_file:com/manticore/etl/HTMLSource$HTMLSourceField.class */
    public class HTMLSourceField {
        public String name;
        public String xpath;
        public String regex;
        public int type;
        public String format;

        public HTMLSourceField(String str, String str2, String str3, int i, String str4) {
            this.name = str;
            this.xpath = str2;
            this.regex = str3;
            this.type = i;
            this.format = str4;
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    public void readPages() {
        ThreadList threadList = new ThreadList();
        Iterator<String> it = this.pageUrlList.iterator();
        while (it.hasNext()) {
            final String next = it.next();
            this.moreData = true;
            Runnable runnable = new Runnable() { // from class: com.manticore.etl.HTMLSource.1
                @Override // java.lang.Runnable
                public void run() {
                    Object[] objArr = new Object[HTMLSource.this.fields.size()];
                    if (HTMLSource.this.data != null) {
                        try {
                            Logger.getAnonymousLogger().info("read " + next);
                            HttpGet httpGet = new HttpGet(next);
                            Document parseHtml = XMLTools.parseHtml(HTMLSource.this.client.execute(httpGet).getEntity().getContent());
                            if (HTMLSource.writeTmpFiles) {
                                XMLTools.writeTempXMLFile(parseHtml);
                            }
                            int i = 0;
                            for (HTMLSourceField hTMLSourceField : HTMLSource.this.fields.values()) {
                                Node selectSingleNode = parseHtml.selectSingleNode(hTMLSourceField.xpath);
                                if (selectSingleNode != null) {
                                    String trim = selectSingleNode.getText().trim();
                                    if (trim.length() > 0 && hTMLSourceField.regex.length() > 0) {
                                        Matcher matcher = Pattern.compile(hTMLSourceField.regex, 104).matcher(trim);
                                        if (matcher.find()) {
                                            trim = matcher.group();
                                        }
                                    }
                                    objArr[i] = trim;
                                }
                                i++;
                            }
                            httpGet.abort();
                        } catch (IOException e) {
                            Logger.getLogger(HTMLSource.class.getName()).log(Level.SEVERE, (String) null, (Throwable) e);
                        } catch (SAXException e2) {
                            Logger.getLogger(HTMLSource.class.getName()).log(Level.SEVERE, (String) null, (Throwable) e2);
                        } catch (DocumentException e3) {
                            Logger.getLogger(HTMLSource.class.getName()).log(Level.SEVERE, (String) null, e3);
                        }
                        try {
                            Logger.getAnonymousLogger().info("write " + HTMLSource.objectsToString(objArr));
                            HTMLSource.this.data.put(objArr);
                        } catch (InterruptedException e4) {
                            Logger.getLogger(HTMLSource.class.getName()).log(Level.SEVERE, (String) null, (Throwable) e4);
                        }
                    }
                }
            };
            if (this.data != null) {
                threadList.add(new Thread(runnable));
            }
        }
        threadList.join();
        Logger.getAnonymousLogger().info("close when all threads done");
        this.moreData = false;
    }

    @Override // com.manticore.etl.ETLSource
    public ETLSource.SourceField[] getFields() {
        throw new UnsupportedOperationException("Not supported yet.");
    }

    @Override // com.manticore.etl.ETLSource
    public void read(Element element) {
        this.url = element.attributeValue("url");
        this.xpath = element.attributeValue("xpath");
        this.regex = element.attributeValue("regex");
        this.page = element.attributeValue("page");
        for (Element element2 : element.elements()) {
            String attributeValue = element2.attributeValue("id");
            this.fields.put(attributeValue, new HTMLSourceField(attributeValue, element2.attributeValue("xpath"), element2.attributeValue("regex"), Integer.parseInt(element2.attributeValue("type")), element2.attributeValue("format")));
        }
    }

    @Override // com.manticore.etl.ETLSource
    public boolean hasMoreData() {
        return this.moreData;
    }

    public HTMLSource(String str, String str2, String str3, String str4) {
        this.client = HttpClientFactory.getClient();
        this.pageUrlList = new ArrayList<>();
        this.moreData = false;
        this.url = str;
        this.xpath = str2;
        this.regex = str3;
        this.page = str4;
        this.fields = new HashMap<>();
    }

    public HTMLSource() {
        this("", "", "", "");
    }

    public void put(String str, String str2, String str3, int i, String str4) {
        this.fields.put(str, new HTMLSourceField(str, str2, str3, i, str4));
    }

    @Override // com.manticore.etl.ETLSource
    public Element save(Element element) {
        Element element2 = element.element("source");
        if (element2 != null) {
            element.remove(element2);
        }
        Element addElement = element.addElement("source");
        addElement.addAttribute("mode", "html");
        addElement.addAttribute("url", this.url);
        addElement.addAttribute("xpath", this.xpath);
        addElement.addAttribute("regex", this.regex);
        addElement.addAttribute("page", this.page);
        for (HTMLSourceField hTMLSourceField : this.fields.values()) {
            Element selectSingleNode = addElement.selectSingleNode("field[@id='" + hTMLSourceField.name + "']");
            if (selectSingleNode == null) {
                selectSingleNode = addElement.addElement("field");
            }
            selectSingleNode.addAttribute("id", hTMLSourceField.name);
            selectSingleNode.addAttribute("xpath", hTMLSourceField.xpath);
            selectSingleNode.addAttribute("regex", hTMLSourceField.regex);
            selectSingleNode.addAttribute("type", String.valueOf(hTMLSourceField.type));
            selectSingleNode.addAttribute("format", hTMLSourceField.format);
        }
        return addElement;
    }

    @Override // com.manticore.etl.ETLSource
    public void execute() {
        Pattern compile = this.regex.length() > 0 ? Pattern.compile(this.regex, 104) : null;
        try {
            HttpGet httpGet = new HttpGet(this.url);
            Document parseHtml = XMLTools.parseHtml(this.client.execute(httpGet).getEntity().getContent());
            if (writeTmpFiles) {
                XMLTools.writeTempXMLFile(parseHtml);
            }
            Iterator it = parseHtml.selectNodes(this.xpath).iterator();
            while (it.hasNext()) {
                String trim = ((Node) it.next()).getText().trim();
                if (trim.length() > 0 && compile != null) {
                    Matcher matcher = compile.matcher(trim);
                    if (matcher.find()) {
                        trim = matcher.group();
                    }
                }
                this.pageUrlList.add(this.page.replace("${id}", trim));
            }
            httpGet.abort();
            this.data = new LinkedBlockingQueue<>(10);
            this.moreData = true;
            new Thread(new Runnable() { // from class: com.manticore.etl.HTMLSource.2
                @Override // java.lang.Runnable
                public void run() {
                    HTMLSource.this.readPages();
                }
            }).start();
        } catch (IllegalStateException e) {
            Logger.getLogger(HTMLSource.class.getName()).log(Level.SEVERE, (String) null, (Throwable) e);
        } catch (DocumentException e2) {
            Logger.getLogger(HTMLSource.class.getName()).log(Level.SEVERE, (String) null, e2);
        } catch (IOException e3) {
            Logger.getLogger(HTMLSource.class.getName()).log(Level.SEVERE, (String) null, (Throwable) e3);
        } catch (SAXException e4) {
            Logger.getLogger(HTMLSource.class.getName()).log(Level.SEVERE, (String) null, (Throwable) e4);
        }
    }

    @Override // com.manticore.etl.ETLSource
    public Object[] getData() {
        Object[] objArr = null;
        try {
            objArr = this.data.take();
            Logger.getAnonymousLogger().info("read " + objectsToString(objArr));
        } catch (InterruptedException e) {
            Logger.getLogger(HTMLSource.class.getName()).log(Level.SEVERE, (String) null, (Throwable) e);
        }
        return objArr;
    }

    public static String objectsToString(Object[] objArr) {
        StringBuilder sb = new StringBuilder();
        for (Object obj : objArr) {
            sb.append(obj).append(";");
        }
        return sb.toString();
    }
}
