[Yanel-commits] rev 23272 - in public/yanel/trunk/src/resources/add-realm: src/java/org/wyona/yanel/impl/resources xslt

michi at wyona.com michi at wyona.com
Thu Mar 15 12:02:28 CET 2007


Author: michi
Date: 2007-03-15 12:02:26 +0100 (Thu, 15 Mar 2007)
New Revision: 23272

Modified:
   public/yanel/trunk/src/resources/add-realm/src/java/org/wyona/yanel/impl/resources/AddRealmResource.java
   public/yanel/trunk/src/resources/add-realm/src/java/org/wyona/yanel/impl/resources/EventLog.java
   public/yanel/trunk/src/resources/add-realm/xslt/add-realm.xsl
Log:
refresh screen added

Modified: public/yanel/trunk/src/resources/add-realm/src/java/org/wyona/yanel/impl/resources/AddRealmResource.java
===================================================================
--- public/yanel/trunk/src/resources/add-realm/src/java/org/wyona/yanel/impl/resources/AddRealmResource.java	2007-03-15 11:01:59 UTC (rev 23271)
+++ public/yanel/trunk/src/resources/add-realm/src/java/org/wyona/yanel/impl/resources/AddRealmResource.java	2007-03-15 11:02:26 UTC (rev 23272)
@@ -61,10 +61,10 @@
 
     private static final String CRAWLER_JAR = "yanel-crawler.jar";
     private static Category log = Category.getInstance(AddRealmResource.class);
-    private final static int INSIDE_TAG = 0;
-    private final static int OUTSIDE_TAG = 1;
     private final static String SESSION_ATTR_EVENT_LOG = "org.wyona.yanel.addrealm.eventlog";
     private final static String SESSION_ATTR_CRAWLER = "org.wyona.yanel.addrealm.crawler";
+    private final static String SESSION_ATTR_REALM_ID = "org.wyona.yanel.addrealm.realm.id";
+    private final static String SESSION_ATTR_REALM_NAME = "org.wyona.yanel.addrealm.realm.name";
     
     private String defaultLanguage = "en";
     private String language = null;
@@ -137,10 +137,12 @@
             
         	 // Check if data was sumbitted (realm ID, realm Name, URL to be dumped, depth of crawling, max number of pages)
             boolean submit = false;
+            boolean stop = false;
             Enumeration enumeration = request.getParameterNames();
             while(enumeration.hasMoreElements()){
-                if(enumeration.nextElement().toString().equals("submit")) 
-                    submit = true;
+                String param = enumeration.nextElement().toString(); 
+                if (param.equals("submit")) submit = true;
+                if (param.equals("stop")) stop = true;
             }
             
         	File XSLTFile = org.wyona.commons.io.FileUtil.file(rtd.getConfigFile().getParentFile().getAbsolutePath(), "xslt" + File.separator + "add-realm.xsl");
@@ -168,6 +170,8 @@
             Set keys = parameters.keySet();
             Iterator keysIterator = keys.iterator();
             
+            HttpSession session = getRequest().getSession(true); 
+            
             if(submit) {
             	
                 while (keysIterator.hasNext()) {
@@ -212,12 +216,12 @@
                         
                         importSite(crawlStartURL, maxPages, maxDepth, realmID);
                         
-                        HttpSession session = getRequest().getSession(true); 
                         EventLog eventLog = (EventLog)session.getAttribute(SESSION_ATTR_EVENT_LOG);
                         if (eventLog != null) {
                             transformer.setParameter("downloadevents", eventLog.getDownloadEvents());
                             transformer.setParameter("errorevents", eventLog.getErrorEvents());
                             transformer.setParameter("nofdownloads", String.valueOf(eventLog.getNofDownloads()));
+                            transformer.setParameter("isdone", String.valueOf(eventLog.isDone()));
                         }
 
                     }
@@ -225,6 +229,38 @@
                 
                 transformer.transform(new javax.xml.transform.stream.StreamSource(statusXMLFile), new StreamResult(byteArrayOutputStream));
                 
+            } else if (session.getAttribute(SESSION_ATTR_EVENT_LOG) != null) {
+                // the crawler is running
+                
+                if (stop) {
+                    DumpingCrawler crawler = (DumpingCrawler)session.getAttribute(SESSION_ATTR_CRAWLER);
+                    if (crawler != null) {
+                        crawler.stop();
+                    }
+                }
+                
+                // show progress
+                EventLog eventLog = (EventLog)session.getAttribute(SESSION_ATTR_EVENT_LOG);
+                if (eventLog != null) {
+                    transformer.setParameter("downloadevents", eventLog.getDownloadEvents());
+                    transformer.setParameter("errorevents", eventLog.getErrorEvents());
+                    transformer.setParameter("nofdownloads", String.valueOf(eventLog.getNofDownloads()));
+                    transformer.setParameter("submitted", "true");
+                    transformer.setParameter("isdone", String.valueOf(eventLog.isDone()));
+
+                    transformer.setParameter("yanel.back2context", PathUtil.backToContext(realm, getPath()));
+                    transformer.setParameter("realmid", session.getAttribute(SESSION_ATTR_REALM_ID));
+                    transformer.setParameter("realmname", session.getAttribute(SESSION_ATTR_REALM_NAME));
+
+                    if (eventLog.isDone()) {
+                        session.removeAttribute(SESSION_ATTR_EVENT_LOG);
+                        session.removeAttribute(SESSION_ATTR_CRAWLER);
+                        session.removeAttribute(SESSION_ATTR_REALM_ID);
+                        session.removeAttribute(SESSION_ATTR_REALM_NAME);
+                    }
+                }
+                transformer.transform(new javax.xml.transform.stream.StreamSource(statusXMLFile), new StreamResult(byteArrayOutputStream));
+                
             } else {
             	
                 while (keysIterator.hasNext()) {
@@ -272,227 +308,21 @@
         
         EventLog eventLog = new EventLog();
         crawler.addLinkListener(eventLog);
+        crawler.addCrawlListener(eventLog);
        
+        Realm realm = getYanel().getRealmConfiguration().getRealm(realmID);
+        
         HttpSession session = getRequest().getSession(true); 
         session.setAttribute(SESSION_ATTR_EVENT_LOG, eventLog);
-        //session.setAttribute(SESSION_ATTR_CRAWLER, crawler);
+        session.setAttribute(SESSION_ATTR_CRAWLER, crawler);
+        session.setAttribute(SESSION_ATTR_REALM_ID, realm.getID());
+        session.setAttribute(SESSION_ATTR_REALM_NAME, realm.getName());
         
-        // create dump:
-        // TODO: start crawler in thread and show progress
-        crawler.run();
-        crawler.close();
-        
-        // import dump into realm:
-        Realm realm = getYanel().getRealmConfiguration().getRealm(realmID);
-        deleteRepositoryContent(realm.getRepository());
-        deleteRepositoryContent(realm.getRTIRepository());
-        Node root = realm.getRepository().getRootNode();
-        importContent(new File(dumpDir), root);
-        
-        // remove temp dump dir
-        FileUtils.deleteDirectory(new File(dumpDir));
-        
-        fixRootNode(crawlStartURL, root);
-        addResourceConfiguration(realm.getRTIRepository());
+        // start crawler in new thread to be able to show progress:
+        ImportSiteThread thread = new ImportSiteThread(crawler, realm, dumpDir, crawlStartURL);
+        thread.start();
     }
-    
-    /**
-     * Imports the content of the given directory into the repository as child nodes
-     * of the given node. This will recursively add the complete subtree.
-     * If a Node already exists in the repository, it will be overwritten.
-     * @param dir
-     * @param node
-     * @throws IOException
-     * @throws RepositoryException
-     */
-    protected void importContent(File dir, Node node) throws IOException, RepositoryException{
-        File[] children = dir.listFiles();
-        for (int i=0; i<children.length; i++) {
-            File file = children[i];
-            String name = file.getName();
-            Node childNode;
-            if (file.isDirectory()) {
-                if (node.hasNode(name)) {
-                    childNode = node.getNode(name);
-                } else {
-                    childNode = node.addNode(name, NodeType.COLLECTION);
-                }
-                // recursion:
-                importContent(file, childNode);
-            } else {
-                if (node.hasNode(name)) {
-                    childNode = node.getNode(name);
-                } else {
-                    childNode = node.addNode(name, NodeType.RESOURCE);
-                }
-                String mimeType = guessMimeType(FilenameUtils.getExtension(file.getName()));
-                InputStream is = new FileInputStream(file);
-                OutputStream os = childNode.getOutputStream();
-                if (mimeType.equals("text/html")) {
-                    addIntrospectionLink(is, os);
-                } else {
-                    byte[] buf = new byte[8192];
-                    int bytesRead;
-                    while ((bytesRead = is.read(buf)) != -1) {
-                        os.write(buf, 0, bytesRead);
-                    }
-                }
-                os.flush();
-                os.close();
-                is.close();
-                childNode.setMimeType(mimeType);
-            }
-        }
-    }
-    
-    /**
-     * Adds a yanel introspection link element to the head element of the current page.
-     * Note: this method is stream based and does not consider character encoding, therefore
-     * it works only for data with ascii-compatible encoding like utf-8 or iso-8859-1. 
-     * @param is stream of the source html page
-     * @param os stream of the result html page
-     * @throws IOException
-     */
-    protected void addIntrospectionLink(InputStream is, OutputStream os) throws IOException {
-        int b;
-        int state = OUTSIDE_TAG;
-        StringBuffer tagNameBuf = null;
-        while ((b = is.read()) != -1) {
-            switch (state) {
-            case OUTSIDE_TAG:
-                if (b == '<') {
-                    tagNameBuf = new StringBuffer();
-                    state = INSIDE_TAG;
-                }
-                os.write(b);
-                break;
-            case INSIDE_TAG:
-                os.write(b);
-                if (b == '>') {
-                    state = OUTSIDE_TAG;
-                    String tagName = tagNameBuf.toString();
-                    if (tagName.startsWith("head")) {
-                        String introspectionLink = "<link rel=\"neutron-introspection\" type=\"application/neutron+xml\" href=\"?yanel.resource.usecase=introspection\"/>";
-                        os.write(introspectionLink.getBytes());
-                    }
-                } else {
-                    tagNameBuf.append((char)b);
-                }
-                break;
-            }
-        }
-    }
-    
-    /**
-     * Creates a redirect from the repository root node to the crawl root page.
-     * This is necessary to make the root page of the crawl 
-     * accessible at root url of the new realm.
-     * Example: 
-     * crawlStartURL: http://foo.bar/start.html
-     * new realm id:  foo-realm
-     * -> /foo-realm/ will redirect to /foo-realm/start.html 
-     * @param crawlStartURL
-     * @param root
-     * @throws RepositoryException 
-     */
-    protected void fixRootNode(String crawlStartURL, Node root) {
-        try {
-            URL url = new URL(crawlStartURL);
-            String path = url.getPath();
-            String crawlRoot = null;
-            if (path.length() == 0 || path.endsWith("/")) {
-                crawlRoot = "index.html";
-            } else if (path.indexOf("/") > -1) {
-                crawlRoot = path.substring(path.lastIndexOf("/") + 1);
-            }
-            log.debug("crawlRoot: " + crawlRoot);
-            if (crawlRoot != null && root.hasNode(crawlRoot)) {
-                PrintWriter writer = new PrintWriter(new OutputStreamWriter(root.getOutputStream()));
-                writer.println("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">");
-                writer.println("<html>");
-                writer.println("<head>");
-                writer.println("<meta http-equiv=\"refresh\" content=\"0; url=" + crawlRoot + "\"/>");
-                writer.println("</head>");
-                writer.println("<body/>");
-                writer.println("</html>");
-                writer.flush();
-                writer.close();
-            }
-        } catch (Exception e) {
-            log.error(e, e);
-            // ignore 
-        }
-    }
-    
-    /**
-     * Adds a resource configuration file for the root node of the repository.
-     * @param repository
-     * @throws RepositoryException
-     */
-    protected void addResourceConfiguration(Repository repository) throws RepositoryException {
-        Node node = repository.getRootNode().addNode(".yanel-rc", NodeType.RESOURCE);
-        PrintWriter writer = new PrintWriter(new OutputStreamWriter(node.getOutputStream()));
-        writer.println("<?xml version=\"1.0\"?>");
-        writer.println("<yanel:resource-config xmlns:yanel=\"http://www.wyona.org/yanel/rti/1.0\">");
-        writer.println("<yanel:rti name=\"file\" namespace=\"http://www.wyona.org/yanel/resource/1.0\"/>");
-        writer.println("<yanel:property name=\"mime-type\" value=\"text/html\"/>");
-        writer.println("</yanel:resource-config>");
-        writer.flush();
-        writer.close();
-    }
-    
-    /**
-     * Delete all nodes from the repository except the root node.
-     * @param repository
-     * @throws RepositoryException
-     */
-    protected void deleteRepositoryContent(Repository repository) throws RepositoryException {
-        Node[] children = repository.getRootNode().getNodes();
-        for (int i=0; i<children.length; i++) {
-            children[i].delete();
-        }
-    }
-    
-    /**
-     * Returns the mime-type according to the given file extension.
-     * Default is application/octet-stream.
-     * @param extension
-     * @return
-     */
-    protected String guessMimeType(String extension) {
-        String ext = extension.toLowerCase();
-        if (ext.equals("html") || ext.equals("htm")) return "text/html";
-        if (ext.equals("css")) return "text/css";
-        if (ext.equals("txt")) return "text/plain";
-        if (ext.equals("js")) return "application/x-javascript";
-        if (ext.equals("jpg") || ext.equals("jpg")) return "image/jpeg";
-        if (ext.equals("gif")) return "image/gif";
-        if (ext.equals("pdf")) return "application/pdf";
-        if (ext.equals("zip")) return "application/zip";
-        //TODO: add more
-        return "application/octet-stream"; // default
-    }
-    
-    /* TODO: add showProgress
-    private View showProgress(Path path, View defaultView) throws Exception {
         
-        //get tmpResultDir from session
-        tmpResultDir = (File) request.getSession().getAttribute("tmpResultDir");
-        
-        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
-
-        Transformer transformer = null;
-        transformer = TransformerFactory.newInstance().newTransformer(new StreamSource(inputXSLTFile));
-        // TODO: Is this the best way to generate an InputStream from an
-        // OutputStream?
-        java.io.ByteArrayOutputStream baos = new java.io.ByteArrayOutputStream();
-        transformer.transform(new StreamSource(new ByteArrayInputStream(byteArrayOutputStream.toByteArray())),
-                new StreamResult(baos));
-        defaultView.setMimeType(getMimeType(viewId));
-        defaultView.setInputStream(new java.io.ByteArrayInputStream(baos.toByteArray()));
-        return defaultView;
-    }*/
-   
     /**
      * 
      */

Modified: public/yanel/trunk/src/resources/add-realm/src/java/org/wyona/yanel/impl/resources/EventLog.java
===================================================================
--- public/yanel/trunk/src/resources/add-realm/src/java/org/wyona/yanel/impl/resources/EventLog.java	2007-03-15 11:01:59 UTC (rev 23271)
+++ public/yanel/trunk/src/resources/add-realm/src/java/org/wyona/yanel/impl/resources/EventLog.java	2007-03-15 11:02:26 UTC (rev 23272)
@@ -7,20 +7,24 @@
 import java.io.Serializable;
 import java.util.ArrayList;
 
+import websphinx.CrawlEvent;
+import websphinx.CrawlListener;
 import websphinx.LinkEvent;
 import websphinx.LinkListener;
 
 /**
  * 
  */
-public class EventLog implements LinkListener, Serializable {
+public class EventLog implements LinkListener, CrawlListener, Serializable {
 
-    ArrayList downloadEvents;
-    ArrayList errorEvents;
+    private ArrayList downloadEvents;
+    private ArrayList errorEvents;
+    private boolean isDone;
     
     public EventLog() {
         this.downloadEvents = new ArrayList();
         this.errorEvents = new ArrayList();
+        this.isDone = false;
     }
     
     /**
@@ -65,4 +69,24 @@
     public int getNofDownloads() {
         return this.downloadEvents.size();
     }
+
+    public void cleared(CrawlEvent event) {
+    }
+
+    public void paused(CrawlEvent event) {
+    }
+
+    public void started(CrawlEvent event) {
+    }
+
+    public void stopped(CrawlEvent event) {
+        this.isDone = true;
+    }
+
+    public void timedOut(CrawlEvent event) {
+    }
+    
+    public boolean isDone() {
+        return this.isDone;
+    }
 }

Modified: public/yanel/trunk/src/resources/add-realm/xslt/add-realm.xsl
===================================================================
--- public/yanel/trunk/src/resources/add-realm/xslt/add-realm.xsl	2007-03-15 11:01:59 UTC (rev 23271)
+++ public/yanel/trunk/src/resources/add-realm/xslt/add-realm.xsl	2007-03-15 11:02:26 UTC (rev 23272)
@@ -20,6 +20,7 @@
   <xsl:param name="downloadevents" select="''" />
   <xsl:param name="errorevents" select="''" />
   <xsl:param name="nofdownloads" select="''" />
+  <xsl:param name="isdone" select="''" />
 
   <xsl:param name="submitted" select="'false'" />
 
@@ -32,6 +33,9 @@
               padding-bottom:10px;
             }
         </style>
+        <xsl:if test="$isdone = 'false'">
+          <meta http-equiv="refresh" content="2"/>
+        </xsl:if>
       </head>
       
       <body>
@@ -40,19 +44,30 @@
         <div id="contentBody">
           <xsl:choose>
             <xsl:when test="$submitted != 'false'">
+              <xsl:choose>
+                <xsl:when test="$isdone = 'true'">
+                  <h2>Import completed</h2>
+                  <p>
+                    <a>
+                      <xsl:attribute name="href">
+                        <xsl:value-of select="$yanel.back2context"/><xsl:value-of select="$realmid"/>/
+                      </xsl:attribute>View <xsl:value-of select="$realmname" />
+                    </a>
+                  </p>
+                </xsl:when>
+                <xsl:otherwise>
+                  <h2>Import running, please wait...</h2>
+                  <form>
+                    <input type="submit" value="Stop Crawl" name="stop"/>
+                  </form>
+                </xsl:otherwise>
+              </xsl:choose>
+              
               <p>[<xsl:value-of select="$nofdownloads"/>] pages have been imported.</p>
-              <p>[X]% complete.</p>
               <p>Downloaded Pages:</p>
               <p style="font-size: small"><pre><xsl:value-of select="$downloadevents"/></pre></p>
               <p>Errors:</p>
               <p style="font-size: small"><pre><xsl:value-of select="$errorevents"/></pre></p>
-              <p>
-                <a>
-                  <xsl:attribute name="href">
-                    <xsl:value-of select="$yanel.back2context"/><xsl:value-of select="$realmid"/>/
-                  </xsl:attribute>View <xsl:value-of select="$realmname" />
-                </a>
-              </p>
             </xsl:when>
             <xsl:otherwise>
               




More information about the Yanel-commits mailing list