[Yanel-commits] rev 23272 - in
public/yanel/trunk/src/resources/add-realm:
src/java/org/wyona/yanel/impl/resources xslt
michi at wyona.com
michi at wyona.com
Thu Mar 15 12:02:28 CET 2007
Author: michi
Date: 2007-03-15 12:02:26 +0100 (Thu, 15 Mar 2007)
New Revision: 23272
Modified:
public/yanel/trunk/src/resources/add-realm/src/java/org/wyona/yanel/impl/resources/AddRealmResource.java
public/yanel/trunk/src/resources/add-realm/src/java/org/wyona/yanel/impl/resources/EventLog.java
public/yanel/trunk/src/resources/add-realm/xslt/add-realm.xsl
Log:
refresh screen added
Modified: public/yanel/trunk/src/resources/add-realm/src/java/org/wyona/yanel/impl/resources/AddRealmResource.java
===================================================================
--- public/yanel/trunk/src/resources/add-realm/src/java/org/wyona/yanel/impl/resources/AddRealmResource.java 2007-03-15 11:01:59 UTC (rev 23271)
+++ public/yanel/trunk/src/resources/add-realm/src/java/org/wyona/yanel/impl/resources/AddRealmResource.java 2007-03-15 11:02:26 UTC (rev 23272)
@@ -61,10 +61,10 @@
private static final String CRAWLER_JAR = "yanel-crawler.jar";
private static Category log = Category.getInstance(AddRealmResource.class);
- private final static int INSIDE_TAG = 0;
- private final static int OUTSIDE_TAG = 1;
private final static String SESSION_ATTR_EVENT_LOG = "org.wyona.yanel.addrealm.eventlog";
private final static String SESSION_ATTR_CRAWLER = "org.wyona.yanel.addrealm.crawler";
+ private final static String SESSION_ATTR_REALM_ID = "org.wyona.yanel.addrealm.realm.id";
+ private final static String SESSION_ATTR_REALM_NAME = "org.wyona.yanel.addrealm.realm.name";
private String defaultLanguage = "en";
private String language = null;
@@ -137,10 +137,12 @@
// Check if data was sumbitted (realm ID, realm Name, URL to be dumped, depth of crawling, max number of pages)
boolean submit = false;
+ boolean stop = false;
Enumeration enumeration = request.getParameterNames();
while(enumeration.hasMoreElements()){
- if(enumeration.nextElement().toString().equals("submit"))
- submit = true;
+ String param = enumeration.nextElement().toString();
+ if (param.equals("submit")) submit = true;
+ if (param.equals("stop")) stop = true;
}
File XSLTFile = org.wyona.commons.io.FileUtil.file(rtd.getConfigFile().getParentFile().getAbsolutePath(), "xslt" + File.separator + "add-realm.xsl");
@@ -168,6 +170,8 @@
Set keys = parameters.keySet();
Iterator keysIterator = keys.iterator();
+ HttpSession session = getRequest().getSession(true);
+
if(submit) {
while (keysIterator.hasNext()) {
@@ -212,12 +216,12 @@
importSite(crawlStartURL, maxPages, maxDepth, realmID);
- HttpSession session = getRequest().getSession(true);
EventLog eventLog = (EventLog)session.getAttribute(SESSION_ATTR_EVENT_LOG);
if (eventLog != null) {
transformer.setParameter("downloadevents", eventLog.getDownloadEvents());
transformer.setParameter("errorevents", eventLog.getErrorEvents());
transformer.setParameter("nofdownloads", String.valueOf(eventLog.getNofDownloads()));
+ transformer.setParameter("isdone", String.valueOf(eventLog.isDone()));
}
}
@@ -225,6 +229,38 @@
transformer.transform(new javax.xml.transform.stream.StreamSource(statusXMLFile), new StreamResult(byteArrayOutputStream));
+ } else if (session.getAttribute(SESSION_ATTR_EVENT_LOG) != null) {
+ // the crawler is running
+
+ if (stop) {
+ DumpingCrawler crawler = (DumpingCrawler)session.getAttribute(SESSION_ATTR_CRAWLER);
+ if (crawler != null) {
+ crawler.stop();
+ }
+ }
+
+ // show progress
+ EventLog eventLog = (EventLog)session.getAttribute(SESSION_ATTR_EVENT_LOG);
+ if (eventLog != null) {
+ transformer.setParameter("downloadevents", eventLog.getDownloadEvents());
+ transformer.setParameter("errorevents", eventLog.getErrorEvents());
+ transformer.setParameter("nofdownloads", String.valueOf(eventLog.getNofDownloads()));
+ transformer.setParameter("submitted", "true");
+ transformer.setParameter("isdone", String.valueOf(eventLog.isDone()));
+
+ transformer.setParameter("yanel.back2context", PathUtil.backToContext(realm, getPath()));
+ transformer.setParameter("realmid", session.getAttribute(SESSION_ATTR_REALM_ID));
+ transformer.setParameter("realmname", session.getAttribute(SESSION_ATTR_REALM_NAME));
+
+ if (eventLog.isDone()) {
+ session.removeAttribute(SESSION_ATTR_EVENT_LOG);
+ session.removeAttribute(SESSION_ATTR_CRAWLER);
+ session.removeAttribute(SESSION_ATTR_REALM_ID);
+ session.removeAttribute(SESSION_ATTR_REALM_NAME);
+ }
+ }
+ transformer.transform(new javax.xml.transform.stream.StreamSource(statusXMLFile), new StreamResult(byteArrayOutputStream));
+
} else {
while (keysIterator.hasNext()) {
@@ -272,227 +308,21 @@
EventLog eventLog = new EventLog();
crawler.addLinkListener(eventLog);
+ crawler.addCrawlListener(eventLog);
+ Realm realm = getYanel().getRealmConfiguration().getRealm(realmID);
+
HttpSession session = getRequest().getSession(true);
session.setAttribute(SESSION_ATTR_EVENT_LOG, eventLog);
- //session.setAttribute(SESSION_ATTR_CRAWLER, crawler);
+ session.setAttribute(SESSION_ATTR_CRAWLER, crawler);
+ session.setAttribute(SESSION_ATTR_REALM_ID, realm.getID());
+ session.setAttribute(SESSION_ATTR_REALM_NAME, realm.getName());
- // create dump:
- // TODO: start crawler in thread and show progress
- crawler.run();
- crawler.close();
-
- // import dump into realm:
- Realm realm = getYanel().getRealmConfiguration().getRealm(realmID);
- deleteRepositoryContent(realm.getRepository());
- deleteRepositoryContent(realm.getRTIRepository());
- Node root = realm.getRepository().getRootNode();
- importContent(new File(dumpDir), root);
-
- // remove temp dump dir
- FileUtils.deleteDirectory(new File(dumpDir));
-
- fixRootNode(crawlStartURL, root);
- addResourceConfiguration(realm.getRTIRepository());
+ // start crawler in new thread to be able to show progress:
+ ImportSiteThread thread = new ImportSiteThread(crawler, realm, dumpDir, crawlStartURL);
+ thread.start();
}
-
- /**
- * Imports the content of the given directory into the repository as child nodes
- * of the given node. This will recursively add the complete subtree.
- * If a Node already exists in the repository, it will be overwritten.
- * @param dir
- * @param node
- * @throws IOException
- * @throws RepositoryException
- */
- protected void importContent(File dir, Node node) throws IOException, RepositoryException{
- File[] children = dir.listFiles();
- for (int i=0; i<children.length; i++) {
- File file = children[i];
- String name = file.getName();
- Node childNode;
- if (file.isDirectory()) {
- if (node.hasNode(name)) {
- childNode = node.getNode(name);
- } else {
- childNode = node.addNode(name, NodeType.COLLECTION);
- }
- // recursion:
- importContent(file, childNode);
- } else {
- if (node.hasNode(name)) {
- childNode = node.getNode(name);
- } else {
- childNode = node.addNode(name, NodeType.RESOURCE);
- }
- String mimeType = guessMimeType(FilenameUtils.getExtension(file.getName()));
- InputStream is = new FileInputStream(file);
- OutputStream os = childNode.getOutputStream();
- if (mimeType.equals("text/html")) {
- addIntrospectionLink(is, os);
- } else {
- byte[] buf = new byte[8192];
- int bytesRead;
- while ((bytesRead = is.read(buf)) != -1) {
- os.write(buf, 0, bytesRead);
- }
- }
- os.flush();
- os.close();
- is.close();
- childNode.setMimeType(mimeType);
- }
- }
- }
-
- /**
- * Adds a yanel introspection link element to the head element of the current page.
- * Note: this method is stream based and does not consider character encoding, therefore
- * it works only for data with ascii-compatible encoding like utf-8 or iso-8859-1.
- * @param is stream of the source html page
- * @param os stream of the result html page
- * @throws IOException
- */
- protected void addIntrospectionLink(InputStream is, OutputStream os) throws IOException {
- int b;
- int state = OUTSIDE_TAG;
- StringBuffer tagNameBuf = null;
- while ((b = is.read()) != -1) {
- switch (state) {
- case OUTSIDE_TAG:
- if (b == '<') {
- tagNameBuf = new StringBuffer();
- state = INSIDE_TAG;
- }
- os.write(b);
- break;
- case INSIDE_TAG:
- os.write(b);
- if (b == '>') {
- state = OUTSIDE_TAG;
- String tagName = tagNameBuf.toString();
- if (tagName.startsWith("head")) {
- String introspectionLink = "<link rel=\"neutron-introspection\" type=\"application/neutron+xml\" href=\"?yanel.resource.usecase=introspection\"/>";
- os.write(introspectionLink.getBytes());
- }
- } else {
- tagNameBuf.append((char)b);
- }
- break;
- }
- }
- }
-
- /**
- * Creates a redirect from the repository root node to the crawl root page.
- * This is necessary to make the root page of the crawl
- * accessible at root url of the new realm.
- * Example:
- * crawlStartURL: http://foo.bar/start.html
- * new realm id: foo-realm
- * -> /foo-realm/ will redirect to /foo-realm/start.html
- * @param crawlStartURL
- * @param root
- * @throws RepositoryException
- */
- protected void fixRootNode(String crawlStartURL, Node root) {
- try {
- URL url = new URL(crawlStartURL);
- String path = url.getPath();
- String crawlRoot = null;
- if (path.length() == 0 || path.endsWith("/")) {
- crawlRoot = "index.html";
- } else if (path.indexOf("/") > -1) {
- crawlRoot = path.substring(path.lastIndexOf("/") + 1);
- }
- log.debug("crawlRoot: " + crawlRoot);
- if (crawlRoot != null && root.hasNode(crawlRoot)) {
- PrintWriter writer = new PrintWriter(new OutputStreamWriter(root.getOutputStream()));
- writer.println("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">");
- writer.println("<html>");
- writer.println("<head>");
- writer.println("<meta http-equiv=\"refresh\" content=\"0; url=" + crawlRoot + "\"/>");
- writer.println("</head>");
- writer.println("<body/>");
- writer.println("</html>");
- writer.flush();
- writer.close();
- }
- } catch (Exception e) {
- log.error(e, e);
- // ignore
- }
- }
-
- /**
- * Adds a resource configuration file for the root node of the repository.
- * @param repository
- * @throws RepositoryException
- */
- protected void addResourceConfiguration(Repository repository) throws RepositoryException {
- Node node = repository.getRootNode().addNode(".yanel-rc", NodeType.RESOURCE);
- PrintWriter writer = new PrintWriter(new OutputStreamWriter(node.getOutputStream()));
- writer.println("<?xml version=\"1.0\"?>");
- writer.println("<yanel:resource-config xmlns:yanel=\"http://www.wyona.org/yanel/rti/1.0\">");
- writer.println("<yanel:rti name=\"file\" namespace=\"http://www.wyona.org/yanel/resource/1.0\"/>");
- writer.println("<yanel:property name=\"mime-type\" value=\"text/html\"/>");
- writer.println("</yanel:resource-config>");
- writer.flush();
- writer.close();
- }
-
- /**
- * Delete all nodes from the repository except the root node.
- * @param repository
- * @throws RepositoryException
- */
- protected void deleteRepositoryContent(Repository repository) throws RepositoryException {
- Node[] children = repository.getRootNode().getNodes();
- for (int i=0; i<children.length; i++) {
- children[i].delete();
- }
- }
-
- /**
- * Returns the mime-type according to the given file extension.
- * Default is application/octet-stream.
- * @param extension
- * @return
- */
- protected String guessMimeType(String extension) {
- String ext = extension.toLowerCase();
- if (ext.equals("html") || ext.equals("htm")) return "text/html";
- if (ext.equals("css")) return "text/css";
- if (ext.equals("txt")) return "text/plain";
- if (ext.equals("js")) return "application/x-javascript";
- if (ext.equals("jpg") || ext.equals("jpg")) return "image/jpeg";
- if (ext.equals("gif")) return "image/gif";
- if (ext.equals("pdf")) return "application/pdf";
- if (ext.equals("zip")) return "application/zip";
- //TODO: add more
- return "application/octet-stream"; // default
- }
-
- /* TODO: add showProgress
- private View showProgress(Path path, View defaultView) throws Exception {
- //get tmpResultDir from session
- tmpResultDir = (File) request.getSession().getAttribute("tmpResultDir");
-
- ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
-
- Transformer transformer = null;
- transformer = TransformerFactory.newInstance().newTransformer(new StreamSource(inputXSLTFile));
- // TODO: Is this the best way to generate an InputStream from an
- // OutputStream?
- java.io.ByteArrayOutputStream baos = new java.io.ByteArrayOutputStream();
- transformer.transform(new StreamSource(new ByteArrayInputStream(byteArrayOutputStream.toByteArray())),
- new StreamResult(baos));
- defaultView.setMimeType(getMimeType(viewId));
- defaultView.setInputStream(new java.io.ByteArrayInputStream(baos.toByteArray()));
- return defaultView;
- }*/
-
/**
*
*/
Modified: public/yanel/trunk/src/resources/add-realm/src/java/org/wyona/yanel/impl/resources/EventLog.java
===================================================================
--- public/yanel/trunk/src/resources/add-realm/src/java/org/wyona/yanel/impl/resources/EventLog.java 2007-03-15 11:01:59 UTC (rev 23271)
+++ public/yanel/trunk/src/resources/add-realm/src/java/org/wyona/yanel/impl/resources/EventLog.java 2007-03-15 11:02:26 UTC (rev 23272)
@@ -7,20 +7,24 @@
import java.io.Serializable;
import java.util.ArrayList;
+import websphinx.CrawlEvent;
+import websphinx.CrawlListener;
import websphinx.LinkEvent;
import websphinx.LinkListener;
/**
*
*/
-public class EventLog implements LinkListener, Serializable {
+public class EventLog implements LinkListener, CrawlListener, Serializable {
- ArrayList downloadEvents;
- ArrayList errorEvents;
+ private ArrayList downloadEvents;
+ private ArrayList errorEvents;
+ private boolean isDone;
public EventLog() {
this.downloadEvents = new ArrayList();
this.errorEvents = new ArrayList();
+ this.isDone = false;
}
/**
@@ -65,4 +69,24 @@
public int getNofDownloads() {
return this.downloadEvents.size();
}
+
+ public void cleared(CrawlEvent event) {
+ }
+
+ public void paused(CrawlEvent event) {
+ }
+
+ public void started(CrawlEvent event) {
+ }
+
+ public void stopped(CrawlEvent event) {
+ this.isDone = true;
+ }
+
+ public void timedOut(CrawlEvent event) {
+ }
+
+ public boolean isDone() {
+ return this.isDone;
+ }
}
Modified: public/yanel/trunk/src/resources/add-realm/xslt/add-realm.xsl
===================================================================
--- public/yanel/trunk/src/resources/add-realm/xslt/add-realm.xsl 2007-03-15 11:01:59 UTC (rev 23271)
+++ public/yanel/trunk/src/resources/add-realm/xslt/add-realm.xsl 2007-03-15 11:02:26 UTC (rev 23272)
@@ -20,6 +20,7 @@
<xsl:param name="downloadevents" select="''" />
<xsl:param name="errorevents" select="''" />
<xsl:param name="nofdownloads" select="''" />
+ <xsl:param name="isdone" select="''" />
<xsl:param name="submitted" select="'false'" />
@@ -32,6 +33,9 @@
padding-bottom:10px;
}
</style>
+ <xsl:if test="$isdone = 'false'">
+ <meta http-equiv="refresh" content="2"/>
+ </xsl:if>
</head>
<body>
@@ -40,19 +44,30 @@
<div id="contentBody">
<xsl:choose>
<xsl:when test="$submitted != 'false'">
+ <xsl:choose>
+ <xsl:when test="$isdone = 'true'">
+ <h2>Import completed</h2>
+ <p>
+ <a>
+ <xsl:attribute name="href">
+ <xsl:value-of select="$yanel.back2context"/><xsl:value-of select="$realmid"/>/
+ </xsl:attribute>View <xsl:value-of select="$realmname" />
+ </a>
+ </p>
+ </xsl:when>
+ <xsl:otherwise>
+ <h2>Import running, please wait...</h2>
+ <form>
+ <input type="submit" value="Stop Crawl" name="stop"/>
+ </form>
+ </xsl:otherwise>
+ </xsl:choose>
+
<p>[<xsl:value-of select="$nofdownloads"/>] pages have been imported.</p>
- <p>[X]% complete.</p>
<p>Downloaded Pages:</p>
<p style="font-size: small"><pre><xsl:value-of select="$downloadevents"/></pre></p>
<p>Errors:</p>
<p style="font-size: small"><pre><xsl:value-of select="$errorevents"/></pre></p>
- <p>
- <a>
- <xsl:attribute name="href">
- <xsl:value-of select="$yanel.back2context"/><xsl:value-of select="$realmid"/>/
- </xsl:attribute>View <xsl:value-of select="$realmname" />
- </a>
- </p>
</xsl:when>
<xsl:otherwise>
More information about the Yanel-commits
mailing list