/*
 * Copyright (C) 2003-2004 Red Hat Inc. All Rights Reserved.
 *
 * The contents of this file are subject to the CCM Public
 * License (the "License"); you may not use this file except in
 * compliance with the License. You may obtain a copy of the
 * License at http://www.redhat.com/licenses/ccmpl.html.
 *
 * Software distributed under the License is distributed on an
 * "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express
 * or implied. See the License for the specific language
 * governing rights and limitations under the License.
 *
 */
package com.arsdigita.search.lucene;

import com.arsdigita.persistence.DataCollection;
import com.arsdigita.persistence.DedicatedConnectionSource;
import com.arsdigita.persistence.Filter;
import com.arsdigita.persistence.Session;
import com.arsdigita.persistence.SessionManager;
import com.arsdigita.persistence.TransactionContext;
import com.arsdigita.persistence.metadata.MetadataRoot;
import com.arsdigita.runtime.RuntimeConfig;
import com.arsdigita.util.jdbc.Connections;

import java.io.File;
import java.io.IOException;
import java.sql.Connection;
import java.util.Date;
import java.util.Locale;
import java.util.TimerTask;

import org.apache.log4j.Logger;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.DateField;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;

/**
 * Indexer.
 *
 * @author <a href="mailto:rhs@mit.edu">rhs@mit.edu</a>
 * @version $Revision: #8 $ $Date: 2004/04/07 $
 **/

class Indexer extends TimerTask {

    public final static String versionId =
        "$Id: //core-platform/dev/src/com/arsdigita/search/lucene/Indexer.java#8 $" +
        " by $Author: dennis $, $DateTime: 2004/04/07 16:07:11 $";

    private static final Logger LOG =
        Logger.getLogger(Indexer.class);

    private static final RuntimeConfig CONF = RuntimeConfig.getConfig();

    // All reads and writes to the index must be synchronized against this
    // lock.
    private static final LuceneLock LOCK = LuceneLock.getInstance();
    private File m_index;

    public Indexer(String index) {
        m_index = new File(index);
    }

    public void run() {
        Session ssn = SessionManager.getSession();
        TransactionContext txn = ssn.getTransactionContext();

        if (txn.inTxn()) {
            throw new IllegalStateException("The lucene indexer must be run " +
                                            "from its own transaction.");
        }

        txn.beginTxn();

        try {
            if (LOG.isInfoEnabled()) {
                LOG.info("Lucene indexer running for '" + m_index + "'.");
            }
            try {
                sync();
            } catch (IOException ioe) {
                LOG.error("couldn't sync", ioe);
            }
            if (LOG.isInfoEnabled()) {
                LOG.info("Lucene indexer finished running for '" + m_index +
                         "'.");
            }
        } catch(Throwable t) {
            LOG.error("Unexpected error occured in Lucene indexer.", t);
        } finally {
            // TODO: Should this really commit regardless of errors that occur?
            txn.commitTxn();
        }
    }

    void sync() throws IOException {
        LOG.debug("syncing");
        Session ssn = SessionManager.getSession();
        DataCollection dc = ssn.retrieve(Document.BASE_DATA_OBJECT_TYPE);
        Filter filter =
            dc.addInSubqueryFilter("id", "com.arsdigita.search.lucene.dirtyDocuments");
        filter.set(Document.DIRTY, new Integer(1 << Index.getIndexID()));

        try {
            while (dc.next()) {
                Document doc = Document.retrieve(dc.getDataObject());
                // the only way to update a lucene document is to first delete
                // it. See
                // http://lucene.sourceforge.net/cgi-bin/faq/faqmanager.cgi?file=chapter.indexing&toc=faq#q4
                delete(doc);
                if (doc.isDeleted()) {
                    // XXX we can't simply delete the doc because multiple JVMs need to 
                    // delete the content.
                    // We need some other way to periodically cleanup docs.
                    
                    //doc.delete();
                    doc.setDirty(false);
                    doc.save();
                } else {
                    update(doc);
                    doc.setDirty(false);
                    doc.save();
                }
            }
        } finally {
            dc.close();
        }
    }

    private void delete(Document doc) throws IOException  {
        synchronized (LOCK) {
            if (LOG.isDebugEnabled()) {
                LOG.debug("Deleting document (" + doc.getID() + "): " +
                              doc.getTitle());
            }
            IndexReader ir = IndexReader.open(m_index);
            try {
                ir.delete(new Term(Document.ID, doc.getID().toString()));
            } finally {
                ir.close();
            }
        }
    }

    private void update(Document doc) throws IOException {
        synchronized (LOCK) {
            IndexWriter iw = new IndexWriter(m_index, new StandardAnalyzer(),
                                             false);
            try {
                if (LOG.isDebugEnabled()) {
                    LOG.debug("Indexing document (" + doc.getID() + "): " +
                              doc.getTitle());
                }
                iw.addDocument(getDocument(doc));
            } finally {
                iw.close();
            }
        }
    }

    private org.apache.lucene.document.Document getDocument(Document doc) {
        org.apache.lucene.document.Document result =
            new org.apache.lucene.document.Document();

        result.add(Field.Keyword(Document.ID, doc.getID().toString()));

        String language = "";
        String country = "";
        Locale locale = doc.getLocale();
        if (locale != null) {
            language = locale.getLanguage();
            country = locale.getCountry();
        }
        result.add(Field.Keyword(Document.LANGUAGE, language));
        result.add(Field.Keyword(Document.COUNTRY, country));

        result.add(Field.Keyword(Document.TYPE, doc.getType()));
        result.add(Field.Keyword(Document.TYPE_SPECIFIC_INFO,
                                 toString(doc.getTypeSpecificInfo())));
        result.add(Field.Keyword(Document.TITLE, doc.getTitle()));
        result.add(Field.Keyword(Document.SUMMARY,
                                 toString(doc.getSummary())));
        result.add(Field.Text(Document.CONTENT, toString(doc.getContent())));
        result.add(Field.Keyword(Document.CREATION_DATE,
                                 toString(doc.getCreationDate())));
        result.add(Field.Keyword(Document.CREATION_PARTY,
                                 toString(doc.getCreationParty())));
        result.add(Field.Keyword(Document.LAST_MODIFIED_DATE,
                                 toString(doc.getLastModifiedDate())));
        result.add(Field.Keyword(Document.LAST_MODIFIED_PARTY,
                                 toString(doc.getLastModifiedParty())));

        return result;
    }

    private static final String toString(Date date) {
        return date == null ? "" : DateField.dateToString(date);
    }

    private static final String toString(Object obj) {
        return obj == null ? "" : obj.toString();
    }

}
