Sunday, March 31, 2013

Example using Accumulo's RegExFilter class



package com.affy;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.SortedMap;
import java.util.TreeMap;
import org.apache.accumulo.core.client.IteratorSetting;
import org.apache.accumulo.core.conf.AccumuloConfiguration;
import org.apache.accumulo.core.data.ByteSequence;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Range;
import org.apache.accumulo.core.data.Value;
import org.apache.accumulo.core.iterators.IteratorEnvironment;
import org.apache.accumulo.core.iterators.IteratorUtil;
import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
import org.apache.accumulo.core.iterators.SortedMapIterator;
import org.apache.accumulo.core.iterators.system.MapFileIterator;
import org.apache.accumulo.core.iterators.user.RegExFilter;
import org.apache.accumulo.core.util.CachedConfiguration;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.log4j.Logger;

public class AccumuloRegExIteratorPlayground {

    private final Logger log = Logger.getLogger(AccumuloRegExIteratorPlayground.class);
    private static final Collection<ByteSequence> EMPTY_COL_FAMS = new ArrayList<ByteSequence>();

    public void process() throws IOException {
        final String regularExpression = "/.*";

        final SortedMap<Key, Value> input = new TreeMap<Key, Value>();
        input.put(new Key("1111", "2222", "3333", 0), new Value("4444".getBytes()));
        input.put(new Key("/1111", "2222", "3333", 0), new Value("4444".getBytes()));

        final RegExFilter rei = new RegExFilter();
        IteratorSetting is = new IteratorSetting(1, RegExFilter.class);
        RegExFilter.setRegexs(is, regularExpression, null, null, null, false);

        if (!rei.validateOptions(is.getOptions())) {
            throw new RuntimeException("invalid options.");
        }

        rei.init(new SortedMapIterator(input), is.getOptions(), new IteratorEnvironment() {
            @Override
            public SortedKeyValueIterator<Key, Value> reserveMapFileReader(String mapFileName) throws IOException {
                Configuration conf = CachedConfiguration.getInstance();
                FileSystem fs = FileSystem.get(conf);
                return new MapFileIterator(AccumuloConfiguration.getDefaultConfiguration(), fs, mapFileName, conf);
            }

            @Override
            public AccumuloConfiguration getConfig() {
                return AccumuloConfiguration.getDefaultConfiguration();
            }

            @Override
            public IteratorUtil.IteratorScope getIteratorScope() {
                throw new UnsupportedOperationException("Not supported yet.");
            }

            @Override
            public boolean isFullMajorCompaction() {
                throw new UnsupportedOperationException("Not supported yet.");
            }

            @Override
            public void registerSideChannel(SortedKeyValueIterator<Key, Value> iter) {
                throw new UnsupportedOperationException("Not supported yet.");
            }
        });
        rei.seek(new Range(), EMPTY_COL_FAMS, false);

        while (rei.hasTop()) {
            final Key key = rei.getTopKey();
            final Value value = rei.getTopValue();
            log.info(key + " --> " + value);
            rei.next();
        }
    }

    public static void main(final String[] args) throws IOException {
        AccumuloRegExIteratorPlayground driver = new AccumuloRegExIteratorPlayground();
        driver.process();
    }
}

Saturday, March 30, 2013

Testing Your GrepIterator Without Running Accumulo

An example program...

package com.affy;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Map;
import java.util.SortedMap;
import java.util.TreeMap;
import org.apache.accumulo.core.client.IteratorSetting;
import org.apache.accumulo.core.client.impl.MasterClient;
import org.apache.accumulo.core.data.ByteSequence;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Range;
import org.apache.accumulo.core.data.Value;
import org.apache.accumulo.core.iterators.SortedMapIterator;
import org.apache.accumulo.core.iterators.user.GrepIterator;
import org.apache.log4j.Logger;

public class AccumuloGrepIteratorPlayground {

    private static final Logger log = Logger.getLogger(AccumuloGrepIteratorPlayground.class);
    private static final Collection EMPTY_COL_FAMS = new ArrayList();

    public static void main(final String[] args) throws IOException {
        final String term = "/1";

        final SortedMap<Key, Value> input = new TreeMap<Key, Value>();
        input.put(new Key("1111", "2222", "3333", 0), new Value("4444".getBytes()));
        input.put(new Key("/1111", "2222", "3333", 0), new Value("4444".getBytes()));

        final GrepIterator grepIterator = new GrepIterator();
        final IteratorSetting iteratorSetting = new IteratorSetting(1, GrepIterator.class);
        GrepIterator.setTerm(iteratorSetting, term);
        grepIterator.init(new SortedMapIterator(input), iteratorSetting.getOptions(), null);
        grepIterator.seek(new Range(), EMPTY_COL_FAMS, false);

        while (grepIterator.hasTop()) {
            final Key key = grepIterator.getTopKey();
            final Value value = grepIterator.getTopValue();
            log.info(key + " --> " + value);
            grepIterator.next();
        }

    }
}

The pom.xml looks like this:

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>com.mycompany</groupId>
    <artifactId>AccumuloGrepIterator</artifactId>
    <version>1.0-SNAPSHOT</version>
    <packaging>jar</packaging>

    <name>AccumuloGrepIterator</name>
    <url>http://maven.apache.org</url>

    <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
    </properties>

    <dependencies>
        <dependency>
            <groupId>org.apache.accumulo</groupId>
            <artifactId>accumulo-core</artifactId>
            <version>1.4.2</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>0.23.6</version>
        </dependency>        
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>3.8.1</version>
            <scope>test</scope>
        </dependency>
    </dependencies>
</project>


Wednesday, March 27, 2013

Running Accumulo Unit Tests On Windows

Accumulo is not designed to run under Windows. Nor are its unit tests. However, if you persist in this foolishness, add the following elements to the indicated pom.xml files.

core/pom.xml

    <profile>
        <id>skipped_tests_on_windows</id>
        <activation>
            <activeByDefault>false</activeByDefault>
            <os>
                <family>Windows</family>
            </os>
            </activation>
        <build>
        <plugins>
            <plugin>
            <groupId>org.apache.maven.plugins</groupId>
            <artifactId>maven-surefire-plugin</artifactId>
                <configuration>
                    <excludes>
                        <exclude>**/AccumuloFileOutputFormatTest.java</exclude>
                        <exclude>**/AccumuloFileInputFormatTest.java</exclude>
                        <exclude>**/AccumuloOutputFormatTest.java</exclude>
                        <exclude>**/AccumuloRowInputFormatTest.java</exclude>
                        <exclude>**/AccumuloInputFormatTest.java</exclude>
                        <exclude>**/MockTableOperationsTest.java</exclude>
                        <exclude>**/AggregatingIteratorTest.java</exclude>
                        <exclude>**/IterUtilTest.java</exclude>
                        <exclude>**/CombinerTest.java</exclude>
                        <exclude>**/FormatterCommandTest.java</exclude>
                        </excludes>
                    </configuration>
                </plugin>
            </plugins>
        </build>
    </profile>

examples/simple/pom.xml

    <profile>
        <id>skipped_tests_on_windows</id>
        <activation>
            <activeByDefault>false</activeByDefault>
            <os>
                <family>Windows</family>
            </os>
            </activation>
        <build>
            <plugins>
                <plugin>
                    <groupId>org.apache.maven.plugins</groupId>
                    <artifactId>maven-surefire-plugin</artifactId>
                    <configuration>
                        <excludes>
                            <exclude>**/ChunkInputFormatTest.java</exclude>
                            </excludes>
                    </configuration>
                </plugin>
            </plugins>
        </build>
    </profile>

proxy/pom.xml

    <profile>
        <id>skipped_tests_on_windows</id>
        <activation>
            <activeByDefault>false</activeByDefault>
            <os>
                <family>Windows</family>
            </os>
            </activation>
        <build>
            <plugins>
                <plugin>
                    <groupId>org.apache.maven.plugins</groupId>
                    <artifactId>maven-surefire-plugin</artifactId>
                    <configuration>
                        <excludes>
                            <exclude>**/SimpleTest.java</exclude>
                            <exclude>**/TestProxyInstanceOperations.java</exclude>
                        </excludes>
                    </configuration>
                </plugin>
            </plugins>
        </build>
    </profile>
  </profiles>

server/pom.xml

    <profile>
        <id>skipped_tests_on_windows</id>
        <activation>
            <activeByDefault>false</activeByDefault>
            <os>
                <family>Windows</family>
            </os>
            </activation>
        <build>
            <plugins>
                <plugin>
                    <groupId>org.apache.maven.plugins</groupId>
                    <artifactId>maven-surefire-plugin</artifactId>
                    <configuration>
                        <excludes>
                            <exclude>**/TableLoadBalancerTest.java</exclude>
                            </excludes>
                    </configuration>
                </plugin>
            </plugins>
        </build>
    </profile>

start/pom.xml

    <profile>
        <id>skipped_tests_on_windows</id>
        <activation>
            <activeByDefault>false</activeByDefault>
            <os>
                <family>Windows</family>
            </os>
            </activation>
        <build>
        <plugins>
            <plugin>
            <groupId>org.apache.maven.plugins</groupId>
            <artifactId>maven-surefire-plugin</artifactId>
                <configuration>
                    <excludes>
                        <exclude>**/AccumuloVFSClassLoaderTest.java</exclude>
                        <exclude>**/ReadOnlyHdfsFileProviderTest.java</exclude>
                        <exclude>**/VfsClassLoaderTest.java</exclude>
                        </excludes>
                    </configuration>
                </plugin>
            </plugins>
        </build>
    </profile>

test/pom.xml

    <profile>
        <id>skipped_tests_on_windows</id>
        <activation>
            <activeByDefault>false</activeByDefault>
            <os>
                <family>Windows</family>
            </os>
            </activation>
        <build>
            <plugins>
                <plugin>
                    <groupId>org.apache.maven.plugins</groupId>
                    <artifactId>maven-surefire-plugin</artifactId>
                    <configuration>
                        <excludes>
                            <exclude>**/MiniAccumuloClusterTest.java</exclude>
                            <exclude>**/ShellServerTest.java</exclude>
                        </excludes>
                    </configuration>
                </plugin>
            </plugins>
        </build>
    </profile>