[Yanel-dev] Important Yarep Patch for spit path

Balz Schreier balz.schreier at gmail.com
Fri Apr 8 15:57:00 CEST 2011


Hi Michael,

I guess my patch from Nov 25th has got lost somehow because the current
Yarep Implementation (org.wyona.yarep.impl.repo.vfs.*) is still the old one
which will not work when split paths are used!

So I now built the patch again on the latest yarep files in SVN.

Quick overview:
- The split path configuration is not spread over the classes anymore, it
got centralized into the new class SplitPathConfig.java (this is also in the
patch, I added the class before I did the svn diff)
- The method *Repository().splitPath() has been moved to the SplitPathConfig
class and has been fixed
- In order to allow customizations in *Repository and *Node class, some
methods had to be made public (instead of package private).

Can you please try it out? This is quite critical for me at the moment
because I want to get rid of Yanel class copies in my realm now (and use
proper inheritance instead).

Please let me know whether the patch looks good to you.
Cheers
Balz
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.wyona.org/pipermail/yanel-development/attachments/20110408/7c6c8e30/attachment-0001.html>
-------------- next part --------------
Index: src/impl/java/org/wyona/yarep/impl/repo/vfs/VirtualFileSystemNode.java
===================================================================
--- src/impl/java/org/wyona/yarep/impl/repo/vfs/VirtualFileSystemNode.java	(revision 57761)
+++ src/impl/java/org/wyona/yarep/impl/repo/vfs/VirtualFileSystemNode.java	(working copy)
@@ -99,10 +99,10 @@
     protected void init() throws RepositoryException {
         
         this.contentDir = getRepository().getContentDir();
-        this.contentFile = new File(this.contentDir, getRepository().splitPath(this.uuid));
+        this.contentFile = new File(this.contentDir, SplitPathConfig.splitPathIfRequired(this.uuid, getRepository().getSplitPathConfig()));
         this.backupContentFile = new File(this.contentDir, this.uuid);
         
-        String metauuid = getRepository().splitPath(uuid + META_DIR_SUFFIX);
+        String metauuid = SplitPathConfig.splitPathIfRequired(uuid + META_DIR_SUFFIX, getRepository().getSplitPathConfig());
         if (getRepository().getMetaDir() != null) {
             this.metaDir = new File(getRepository().getMetaDir(), metauuid);
             this.backupMetaDir = new File(getRepository().getMetaDir(), uuid + META_DIR_SUFFIX);
@@ -326,7 +326,7 @@
         if (this.repository.existsNode(newPath)) {
             throw new RepositoryException("Node exists already: " + newPath);
         }
-        UID uid = getRepository().getMap().create(new Path(getRepository().splitPath(newPath)), type);
+        UID uid = getRepository().getMap().create(new Path(SplitPathConfig.splitPathIfRequired(newPath, getRepository().getSplitPathConfig())), type);
         // create file:
         File file = new File(this.contentDir, uid.toString());
         try {
Index: src/impl/java/org/wyona/yarep/impl/repo/vfs/VirtualFileSystemRepository.java
===================================================================
--- src/impl/java/org/wyona/yarep/impl/repo/vfs/VirtualFileSystemRepository.java	(revision 57761)
+++ src/impl/java/org/wyona/yarep/impl/repo/vfs/VirtualFileSystemRepository.java	(working copy)
@@ -115,11 +115,7 @@
 
     // Configuration parameters of the <splitpath ...> element
     private boolean splitPathEnabled = false;
-    private int splitparts = 0;
-    private int splitlength = 0;
-    private String DEFAULT_DUMMY_SEPARATOR_VALUE = "-";
-    private String dummySeparator = DEFAULT_DUMMY_SEPARATOR_VALUE;
-    private String[] includepaths = {};
+    private SplitPathConfig splitPathConfig = new SplitPathConfig();
     
     /**
      *
@@ -213,23 +209,22 @@
             Configuration splitConfig = config.getChild("splitpath", false);
             if (splitConfig != null) {
                 splitPathEnabled = true;
-                String depth = splitConfig.getAttribute("depth", "0");
-                splitparts = Integer.parseInt(depth);
+                splitPathConfig.setSplitparts(Integer.parseInt(splitConfig.getAttribute("depth", "0")));
+                splitPathConfig.setSplitlength(Integer.parseInt(splitConfig.getAttribute("length", "0")));
 
-                String length;
-                length = splitConfig.getAttribute("length", "0");
-                splitlength = Integer.parseInt(length);
-
-                dummySeparator = splitConfig.getAttribute("escape", DEFAULT_DUMMY_SEPARATOR_VALUE);
-
-                int c = splitConfig.getChildren("include").length;
-                int i = 0;
-                if (c > 0) {
-                    includepaths = new String[c];
+                int numberOfIncludePaths = splitConfig.getChildren("include").length;
+                if (numberOfIncludePaths > 0) {
+                    String[] includepaths = new String[numberOfIncludePaths];
+                    int i = 0;
                     for (Configuration include : splitConfig.getChildren("include")) {
                         includepaths[i++] = include.getAttribute("path");
                     }
+                    splitPathConfig.setIncludepaths(includepaths);
                 }
+                // NOTE: This repository uses the VFileSystemMapImpl: But we do not tell the map about the splitting.
+                //       Splitting is known to this repository and the node only.
+                // ((org.wyona.yarep.impl.VFileSystemMapImpl) map).setSplitPathConfig(splitPathConfig);
+                // ((org.wyona.yarep.impl.VFileSystemMapImpl) map).setSplitPathEnabled(true);
             } 
         } catch (Exception e) {
             log.error(e.toString());
@@ -296,7 +291,6 @@
      * @see org.wyona.yarep.core.Repository#exists(org.wyona.yarep.core.Path)
      */
     public boolean exists(Path path) throws RepositoryException {
-        log.warn("DEPRECATED");
         return existsNode(path.toString());
     }
 
@@ -401,7 +395,8 @@
             path = path.substring(0, path.length() - 1);
         }
         if (splitPathEnabled) {
-            return map.exists(new Path(splitPath(path))) || map.exists(new Path(path)); // INFO: The OR is because of backwards compatibility in case that a node exists with an unsplitted path, because it has not been migrated yet (which can happen if it has only been read so far, but never written since introducing the split path configuration)
+            String maybeSplit = SplitPathConfig.splitPathIfRequired(path, splitPathConfig);
+            return map.exists(new Path(maybeSplit)) || map.exists(new Path(path));
         } else {
             return map.exists(new Path(path));
         }
@@ -416,7 +411,8 @@
             path = path.substring(0, path.length() - 1);
         }
 
-        if ((splitPathEnabled && map.exists(new Path(splitPath(path)))) || map.exists(new Path(path))) {
+        String maybeSplit = SplitPathConfig.splitPathIfRequired(path, splitPathConfig);
+        if ((splitPathEnabled && map.exists(new Path(maybeSplit))) || map.exists(new Path(path))) {
             String uuid = new UID(path).toString();
             return new VirtualFileSystemNode(this, path, uuid);
         } else {
@@ -609,98 +605,20 @@
         return true;
     }
 
-    /**
-     * Splits a String such that the result can be used as a repo path for a tree-like repo structure.
-     *
-     * This method splits off n strings (where n = parts) of length partlength, e.g. if
-     * splitPath("ec2c0c02-1d7d-4a21-8a39-68f9f72dea09", 3, 4) is called, then:
-     * in:  ec2c0c02-1d7d-4a21-8a39-68f9f72dea09, 3, 4
-     * out: ec2c/0c02/-1d7/d-4a21-8a39-68f9f72dea09
-     *
-     * If the strings length is shorter than parts * partslength, then as many
-     * parts as possible are split, e.g.
-     * in:  foobar, 2, 5
-     * out: fooba/r
-     * in:  lorem, 3, 10
-     * out: lorem
-     *
-     * An example with "/" characters:
-     * in:  /foobar/lorem/ipsum.txt, parts = 3, lenght = 3
-     * out: /foo/bar/-lo/rem/ipsum.txt
-     *
-     * @param uuid
-     * @return split uuid
-     */
-    String splitPath(String path) {
-        // NOTE: uuid should be a full yarep path, so we can safely remove
-        // the leading slash
-        
-        // check if the given path matches any of the include values
-        // in the configuration
-        boolean include = false;
-        String base = "";
-        for (String s : includepaths) {
-            if (path.startsWith(s)) {
-                include = true;
-                base = s;
-                break;
-            }
-        }
+    public SplitPathConfig getSplitPathConfig() {
+        return splitPathConfig;
+    }
 
-        // return the path unchanged if it doesn't match
-        // any of the include values
-        if (!include) {
-            return path;
-        }
-        
-        // remove the leading base string, will be added again later
-        path = path.substring(base.length(), path.length());
-
-        // replace "/" characters where needed
-        if (path.length() <= splitparts * splitlength) {
-            path = path.replaceAll("/", dummySeparator);
-        } else {
-            path = String.format("%s%s",
-                    path.substring(0, splitparts * splitlength).replaceAll("/", dummySeparator),
-                    path.substring(splitparts * splitlength));
-        }
-
-        // now do the actual splitting
-        int len = path.length();
-        int pos = 0;
-        String out = "";
-
-        int partc = 0;
-        int w;
-        while (len > 0 && partc < splitparts) {
-            partc++;
-            if (len < splitlength) {
-                w = len;
-            } else {
-                w = splitlength;
-            }
-            out += path.substring(pos, pos + w);
-            pos += w;
-            len -= w;
-
-            if (len > 0) {
-                out += "/";
-            }
-        }
-
-        // append remainder
-        if (len > 0) {
-            out += path.substring(pos, pos + len);
-        }
-
-        // finally, add the leading zero again and return the new path
-        return base + out;
+    public void setSplitPathConfig(SplitPathConfig splitPathConfig) {
+        this.splitPathConfig = splitPathConfig;
     }
 
-    /**
-     * Check whether split path is enabled and make this available to classes within this package
-     */
-    boolean isSplitPathEnabled() {
+    public boolean isSplitPathEnabled() {
         return splitPathEnabled;
     }
+
+    public void setSplitPathEnabled(boolean splitPathEnabled) {
+        this.splitPathEnabled = splitPathEnabled;
+    }
+
 }
Index: src/impl/java/org/wyona/yarep/impl/repo/vfs/SplitPathConfig.java
===================================================================
--- src/impl/java/org/wyona/yarep/impl/repo/vfs/SplitPathConfig.java	(revision 0)
+++ src/impl/java/org/wyona/yarep/impl/repo/vfs/SplitPathConfig.java	(revision 0)
@@ -0,0 +1,266 @@
+package org.wyona.yarep.impl.repo.vfs;
+
+public class SplitPathConfig {
+    private String[] includepaths = new String[0];
+    private int splitparts;
+    private int splitlength;
+    private String escapeChar = "+";
+    
+    /**
+     * Split Path Configuration: 
+     * @param includepaths : if an includepath is configured, e.g. "/base/" then everything after "/base"/ gets splitted, except for the file extension!
+     * @param splitparts max subdirectories from the baserpath: e.g. base/xx/xx/xx/xx/restofpath : splitparts = 4
+     * @param splitlength the length of the additional subdirectories, e.g. base/xx/xx/xx/xx/restofpath : length=2
+     */
+    public SplitPathConfig(String[] includepaths, int splitparts, int splitlength) {
+        this.includepaths = includepaths;
+        this.splitparts = splitparts;
+        this.splitlength = splitlength;
+    }
+    
+    /**
+     * Creates a config without any include paths
+     */
+    public SplitPathConfig() {
+    }
+    
+    /**
+     * Splits a String such that the result can be used as a repo path for a tree-like repo structure.
+     * The original string can be shorter than splitparts * partlength
+     * The file extension does not get split.
+     * Slashes in the original string are escaped if they are in the range of the split area (splitparts * partlength):
+     * The esacpe character is currently hardcoded and is "+": a slash gets replaced by "+-" and the plus itself by "++".
+     *
+     * Example for splitparts = 2 and partlength = 3:
+     * hugoboss.xml --> hug/obo/ss.xml
+     * hugo.xml --> hug/o.xml
+     * hug.xml --> hug.xml
+     * hugo/boss.xml --> gets stored as hug/o+-/boss.xml
+     * hug/oboss.xml --> gets stored as hug/+-o/boss.xml
+     * hugo+boss.xml --> gets stored as hug/o++/boss.xml
+     * hugobos/s.xml --> gets stored as hug/obo/s/s.xml
+     * hugobo/ss.xml --> (special case: actually hugobo could be split into hug/obo/ ... but as the next character in the original string is a "/", we do not allow this in the split string, because "//" can not get unsplit anymore.
+     *                   So this becomes hug/obo/+-ss.xml
+     * And a mixed example with the escape character:
+     * hu/go+bo/ss.xml --> hu+/-go/++bo/ss.xml (Note that the first slash in the original string got replaced by +- and then the actual slash has been inserted!)
+     *
+     * If the strings length is shorter than parts * partslength, then as many
+     * parts as possible are split, e.g.
+     * in:  foobar, 2, 5
+     * out: fooba/r
+     * in:  lorem, 3, 10
+     * out: lorem
+     *
+     * An example with "/" characters:
+     * in:  /foobar/lorem/ipsum.txt, parts = 3, lenght = 3
+     * out: /foo/bar/-lo/rem/ipsum.txt
+     *
+     * @param path usually the full yarep path of a node, e.g. "/users/chucknorris.xml" or if you have the data repository at "/data" and you add a node to this repository, the path is relative to this repository path, e.g. "/pictures/..." and not "/data/pictures/...
+     * @return split path according to the configured rules
+     */
+    public static String splitPathIfRequired(String path, SplitPathConfig splitPathConfig) {
+        // NOTE: uuid should be a full yarep path, so we can safely remove
+        // the leading slash
+        
+        // check if the given path matches any of the include values
+        // in the configuration
+        boolean include = false;
+        String base = null;
+        for (String s : splitPathConfig.getIncludepaths()) {
+            if (path.startsWith(s)) {
+                include = true;
+                base = s;
+                break;
+            }
+        }
+
+        // return the path unchanged if it doesn't match
+        // any of the include values
+        if (!include) {
+            return path;
+        }
+        
+        // remove the leading base string, will be added again later
+        path = path.substring(base.length(), path.length());
+        // we do not want to split the file ending (e.g. ".xml")
+        String suffix = "";
+        if (path.contains(".")) {
+            suffix = path.substring(path.lastIndexOf("."));
+            path = path.substring(0, path.lastIndexOf("."));
+        }
+        int splitparts = splitPathConfig.getSplitparts();
+        int splitlength = splitPathConfig.getSplitlength();
+        
+        // replace "/" characters where needed
+        if (path.length() <= splitparts * splitlength) {
+            path = path.replaceAll("\\+", "++");
+            path = path.replaceAll("/", "+-");
+        } else {
+            path = path.replaceAll("\\+", "++");
+            path = String.format("%s%s",
+                    path.substring(0, splitparts * splitlength).replaceAll("/", "+-"),
+                    path.substring(splitparts * splitlength));
+        }
+
+        // now do the actual splitting
+        StringBuffer splitPath = new StringBuffer(path);
+        int slashIndex = splitlength;
+        int numberOfSlashesInserted = 0;
+        // slashindex < path length + number of already inserted slahes. by each inserted slash, the path gets one char bigger...
+        while (slashIndex < (path.length() + numberOfSlashesInserted) && numberOfSlashesInserted < splitparts) {
+            splitPath.insert(slashIndex, "/");
+            slashIndex = slashIndex + splitlength + 1; // +1 because the inserted slash
+            numberOfSlashesInserted++;
+        }
+        path = base + splitPath.toString();
+        
+//        ORIGINAL CODE FROM PREVIOUS SPLIT IMPL : I found this too complex and hard to understand.       
+//        int len = path.length();
+//        int pos = 0;
+//        String out = "";
+//
+//        int partc = 0;
+//        int w;
+//        while (len > 0 && partc < splitparts) {
+//            partc++;
+//            if (len < splitlength) {
+//                w = len;
+//            } else {
+//                w = splitlength;
+//            }
+//            out += path.substring(pos, pos + w);
+//            pos += w;
+//            len -= w;
+//
+//            if (len > 0) {
+//                out += "/";
+//            }
+//        }
+//
+//        // append remainder
+//        if (len > 0) {
+//            out += path.substring(pos, pos + len);
+//        }
+//
+//        // finally, add the leading zero again and return the new path
+//        path = base + out;
+        
+        
+        if (path.contains("//")) {
+            path = path.replaceAll("//", "/+-");
+        }
+        // and we add the suffix again
+        path = path + suffix;
+
+        return path;
+    }
+    
+    public static String unsplitPathIfRequired(String path, SplitPathConfig splitPathConfig) {
+        boolean include = false;
+        String base = "";
+        for (String s : splitPathConfig.getIncludepaths()) {
+            if (path.startsWith(s)) {
+                include = true;
+                base = s;
+                break;
+            }
+        }
+
+        if (!include) {
+            return path;
+        }
+        // remove the leading base string, will be added again later
+        path = path.substring(base.length(), path.length());
+        int splitparts = splitPathConfig.getSplitparts();
+        int splitlength = splitPathConfig.getSplitlength();
+
+        // we know that each "/" must be removed and every "+" becomes a slash.
+        
+        // the area where we apply the logic is the original length (splitparts * splitlength) plus one char for each splitpart ("/")  
+        int splitLength = (splitparts * splitlength)+splitparts+1;
+        if (path.length()<splitLength) {
+            splitLength = path.length();
+        }
+        // remove all slashes
+        path = path.substring(0, splitLength).replaceAll("/", "")+path.substring(splitLength);
+        
+        // a simple replacement of (++ -> +) and (+- -> /) does not work because they must be replaced from left to right.
+        StringBuffer convertedPath = new StringBuffer("");
+        char current;
+        char next;
+        int i = 0;
+        for (; i < path.length()-1; i++) {
+            current = path.charAt(i);
+            next = path.charAt(i+1);
+            if (current == '+' && next == '+') {
+                convertedPath.append("+");
+                i++; // skip next char because we found a token!
+            } else if (current == '+' && next == '-') {
+                convertedPath.append("/");
+                i++; // skip next char because we found a token!
+            } else {
+                convertedPath.append(current);
+            }
+        }
+        if (i == path.length()-1) {
+            convertedPath.append(path.charAt(i));
+        }        
+        
+//        // all original pluses are created
+//        path = path.replaceAll("(\\+\\+)", "+");
+//        // all original slashes are created (single + means slash)
+//        path = path.replaceAll("(\\+-)", "/");
+        path = base + convertedPath.toString();
+        return path;
+    }
+    
+    public static boolean isIncludePath(String path, SplitPathConfig splitPathConfig) {
+        boolean isIncludePath = false;
+        // currently the configuration of split include paths is like "/path/", therefore we have to add a slash if it is missing. otherwise it doesnot match
+        if (!path.endsWith("/")) {
+            path = path + "/";
+        }
+        for (String s : splitPathConfig.getIncludepaths()) {
+            if (path.startsWith(s)) {
+                isIncludePath = true;
+                break;
+            }
+        }
+        return isIncludePath;
+    }
+
+    
+
+    public String[] getIncludepaths() {
+        return includepaths;
+    }
+
+    public void setIncludepaths(String[] includepaths) {
+        this.includepaths = includepaths;
+    }
+
+    public int getSplitparts() {
+        return splitparts;
+    }
+
+    public void setSplitparts(int splitparts) {
+        this.splitparts = splitparts;
+    }
+
+    public int getSplitlength() {
+        return splitlength;
+    }
+
+    public void setSplitlength(int splitlength) {
+        this.splitlength = splitlength;
+    }
+
+    public String getEscapeChar() {
+        return escapeChar;
+    }
+
+    public void setEscapeChar(String separator) {
+        this.escapeChar = separator;
+    }
+    
+}


More information about the Yanel-development mailing list