[Yanel-commits] rev 23696 - public/yanel/trunk/src/resources/add-realm/src/java/org/wyona/yanel/impl/resources

michi at wyona.com michi at wyona.com
Thu Apr 12 16:31:11 CEST 2007


Author: michi
Date: 2007-04-12 16:31:09 +0200 (Thu, 12 Apr 2007)
New Revision: 23696

Modified:
   public/yanel/trunk/src/resources/add-realm/src/java/org/wyona/yanel/impl/resources/AddRealmResource2.java
Log:
import site added

Modified: public/yanel/trunk/src/resources/add-realm/src/java/org/wyona/yanel/impl/resources/AddRealmResource2.java
===================================================================
--- public/yanel/trunk/src/resources/add-realm/src/java/org/wyona/yanel/impl/resources/AddRealmResource2.java	2007-04-12 14:22:38 UTC (rev 23695)
+++ public/yanel/trunk/src/resources/add-realm/src/java/org/wyona/yanel/impl/resources/AddRealmResource2.java	2007-04-12 14:31:09 UTC (rev 23696)
@@ -16,12 +16,14 @@
 
 package org.wyona.yanel.impl.resources;
 
-import java.util.Calendar;
-import java.util.HashMap;
 import java.io.File;
 import java.io.StringBufferInputStream;
 import java.io.ByteArrayOutputStream;
+import java.net.URL;
+import java.util.Calendar;
+import java.util.HashMap;
 
+import javax.servlet.http.HttpSession;
 import javax.servlet.http.HttpServletRequest;
 
 import javax.xml.parsers.DocumentBuilder;
@@ -36,12 +38,16 @@
 import org.w3c.dom.Element;
 
 import org.apache.log4j.Category;
+import org.apache.lenya.search.crawler.DumpingCrawler;
 
+import websphinx.DownloadParameters;
+
 import org.wyona.yanel.core.Path;
 import org.wyona.yanel.core.Resource;
 import org.wyona.yanel.core.api.attributes.ViewableV1;
 import org.wyona.yanel.core.attributes.viewable.View;
 import org.wyona.yanel.core.attributes.viewable.ViewDescriptor;
+import org.wyona.yanel.core.map.Realm;
 
 /**
  * 
@@ -50,6 +56,11 @@
 
     private static Category log = Category.getInstance(AddRealmResource2.class);
 
+    private final static String SESSION_ATTR_EVENT_LOG = "org.wyona.yanel.addrealm.eventlog";
+    private final static String SESSION_ATTR_CRAWLER = "org.wyona.yanel.addrealm.crawler";
+    private final static String SESSION_ATTR_REALM_ID = "org.wyona.yanel.addrealm.realm.id";
+    private final static String SESSION_ATTR_REALM_NAME = "org.wyona.yanel.addrealm.realm.name";
+
     String NAMESPACE = "http://www.wyona.org/yanel/1.0";
 
     /**
@@ -273,9 +284,9 @@
         }
 
         if (valid && request.getParameter("confirm") != null && request.getParameter("confirm").equals("true")) {
-            fromScratchElement.appendChild(doc.createElementNS(NAMESPACE, "realm-created"));
             try {
                 getYanel().getRealmConfiguration().copyRealm("from-scratch-realm-template", realmidip.getValue(), realmnameip.getValue(), "/" + realmidip.getValue() + "/", new File(fsLocationValue));
+                fromScratchElement.appendChild(doc.createElementNS(NAMESPACE, "realm-created"));
             } catch (Exception e) {
                 log.error(e.getMessage(), e);
                 fromScratchElement.appendChild(doc.createElementNS(NAMESPACE, "exception"));
@@ -389,9 +400,13 @@
         }
 
         if (valid && request.getParameter("confirm") != null && request.getParameter("confirm").equals("true")) {
-            fromExistingWebsiteElement.appendChild(doc.createElementNS(NAMESPACE, "realm-created"));
+
             try {
                 getYanel().getRealmConfiguration().copyRealm("from-scratch-realm-template", realmidip.getValue(), realmnameip.getValue(), "/" + realmidip.getValue() + "/", new File(fsLocationValue));
+                fromExistingWebsiteElement.appendChild(doc.createElementNS(NAMESPACE, "realm-created"));
+
+                importSite(urlip.getValue(), scopeip.getValue(), new Integer(cmpip.getValue()).intValue(), new Integer(cdip.getValue()).intValue(), new Integer(cmsip.getValue()).intValue(), realmidip.getValue());
+                fromExistingWebsiteElement.appendChild(doc.createElementNS(NAMESPACE, "crawler-running"));
             } catch (Exception e) {
                 log.error(e.getMessage(), e);
                 fromExistingWebsiteElement.appendChild(doc.createElementNS(NAMESPACE, "exception"));
@@ -455,4 +470,55 @@
         if (!new File(value).isDirectory()) return false;
         return true;
     }
+    
+    /**
+     * Crawls and external site and imports it into a realm.
+     * @param crawlStartURL
+     * @param crawlScopeURL comma-separated list of scope urls
+     * @param maxPages
+     * @param maxDepth
+     * @param realmID
+     * @throws Exception
+     */
+    protected void importSite(String crawlStartURL, String crawlScopeURL, int maxPages, int maxDepth, int maxPageSize, String realmID) throws Exception {
+        String[] crawlScopeURLs = null;
+        if (crawlScopeURL == null || crawlScopeURL.length() == 0) {
+            String path = new URL(crawlStartURL).getPath();
+            crawlScopeURLs = new String[1];
+            if (path.length() != 0 && !path.endsWith("/") && path.indexOf("/") > -1) {
+                crawlScopeURLs[0] = crawlStartURL.substring(0, crawlStartURL.lastIndexOf("/"));
+            } else {
+                crawlScopeURLs[0] = crawlStartURL;
+            }
+        } else {
+            crawlScopeURLs = crawlScopeURL.split(",");
+        }
+        
+        String dumpDir = System.getProperty("java.io.tmpdir") + File.separator + "import_" + System.currentTimeMillis();
+        DumpingCrawler crawler = new DumpingCrawler(crawlStartURL, crawlScopeURLs, dumpDir);
+        crawler.setMaxPages(maxPages);
+        crawler.setMaxDepth(maxDepth);
+        
+        DownloadParameters downloadParams = new DownloadParameters();
+        downloadParams = downloadParams.changeMaxPageSize(maxPageSize);
+        crawler.setDownloadParameters(downloadParams);
+        
+        
+        EventLog eventLog = new EventLog();
+        crawler.addLinkListener(eventLog);
+        crawler.addCrawlListener(eventLog);
+        
+        Realm realm = getYanel().getRealmConfiguration().getRealm(realmID);
+       
+        HttpSession session = getRequest().getSession(true); 
+        session.setAttribute(SESSION_ATTR_EVENT_LOG, eventLog);
+        session.setAttribute(SESSION_ATTR_CRAWLER, crawler);
+        session.setAttribute(SESSION_ATTR_REALM_ID, realm.getID());
+        session.setAttribute(SESSION_ATTR_REALM_NAME, realm.getName());
+        
+        // start crawler in new thread to be able to show progress:
+        ImportSiteThread thread = new ImportSiteThread(crawler, realm, dumpDir, crawlStartURL, 
+                crawlScopeURLs, eventLog);
+        thread.start();
+    }
 }




More information about the Yanel-commits mailing list