Tuesday, November 13, 2012

Exampe of Storing User and Friends Information in Accumulo

This example shows one way to store user and friend information in one Accumulo table. Each friend category becomes a Column Family and each friend's user id becomes a Column Qualifier. After the inserts a small query is run to loop over old friends.


package com.codebits.accumulo;

import java.io.IOException;
import java.util.Iterator;
import java.util.Map;

import org.apache.accumulo.core.client.AccumuloException;
import org.apache.accumulo.core.client.AccumuloSecurityException;
import org.apache.accumulo.core.client.BatchWriter;
import org.apache.accumulo.core.client.Connector;
import org.apache.accumulo.core.client.Instance;
import org.apache.accumulo.core.client.Scanner;
import org.apache.accumulo.core.client.TableExistsException;
import org.apache.accumulo.core.client.TableNotFoundException;
import org.apache.accumulo.core.client.mock.MockInstance;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Mutation;
import org.apache.accumulo.core.data.Range;
import org.apache.accumulo.core.data.Value;
import org.apache.accumulo.core.security.Authorizations;
import org.apache.hadoop.io.Text;

public class UserAndFriendsDriver {

  public static void main(String[] args) throws IOException, AccumuloException, AccumuloSecurityException, TableExistsException, TableNotFoundException {
    Instance mock = new MockInstance("development");
    Connector connector = mock.getConnector("root", "password".getBytes());
    connector.tableOperations().create("TABLEA");

    BatchWriter wr = connector.createBatchWriter("TABLEA", 10000000, 10000, 5);
    Mutation m = new Mutation(new Text("john"));
    m.put("info:name", "", "john henry");
    m.put("info:gender", "", "male");
    m.put("friend:old", "mark", "");
    wr.addMutation(m);
    m = new Mutation(new Text("mary"));
    m.put("info:name", "", "mark wiggins");
    m.put("info:gender", "", "female");
    m.put("friend:new", "mark", "");
    m.put("friend:old", "lucas", "");
    m.put("friend:old", "aaron", "");
    wr.addMutation(m);
    wr.close();

    Scanner scanner = connector.createScanner("TABLEA", new Authorizations());
    scanner.setRange(new Range("a", "z"));
    scanner.fetchColumnFamily(new Text("friend:old"));
    Iterator<Map.Entry<Key, Value>> iterator = scanner.iterator();
    while (iterator.hasNext()) {
      Map.Entry<Key, Value> entry = iterator.next();
      Key key = entry.getKey();
      System.out.println("Old Friends: " + key.getRow() + " -> " + key.getColumnQualifier());
    }
  }
}

Monday, November 12, 2012

How Can I Use Reverse Sort On Integer (or Long) Accumulo Keys?

This entry will be very similar to my last. Here is code with debugging (i.e., the original key is retained) so that you can see the reverse sort is working.
package com.codebits.accumulo;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.Iterator;
import java.util.Map;

import org.apache.accumulo.core.client.AccumuloException;
import org.apache.accumulo.core.client.AccumuloSecurityException;
import org.apache.accumulo.core.client.BatchWriter;
import org.apache.accumulo.core.client.Connector;
import org.apache.accumulo.core.client.Instance;
import org.apache.accumulo.core.client.Scanner;
import org.apache.accumulo.core.client.TableExistsException;
import org.apache.accumulo.core.client.TableNotFoundException;
import org.apache.accumulo.core.client.mock.MockInstance;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Mutation;
import org.apache.accumulo.core.data.Value;
import org.apache.accumulo.core.security.Authorizations;
import org.apache.hadoop.io.Text;

public class ReversingIntSortOrderWithMockInstanceDriver {

  static byte[] convert(int value) {
    byte[] key = ByteBuffer.allocate(4).putInt(value).array();
    byte[] reverse_key = ByteBuffer.allocate(4).putInt(Integer.MAX_VALUE - value).array();
    byte[] rv = new byte[8];
    for (int i = 0; i < 4; i++) {
      rv[i] = reverse_key[i];
    }
    for (int i = 0; i < 4; i++) {
      rv[i + 4] = key[i];
    }
    return rv;
  }

  public static String toHexString(byte[] ba) {
    StringBuilder str = new StringBuilder();
    for (int i = 0; i < ba.length; i++)
      str.append(String.format("%x", ba[i]));
    return str.toString();
  }

  public static void main(String[] args) throws IOException, AccumuloException, AccumuloSecurityException, TableExistsException, TableNotFoundException {
    Instance mock = new MockInstance("development");
    Connector connector = mock.getConnector("root", "password".getBytes());
    connector.tableOperations().create("TABLEA");

    BatchWriter wr = connector.createBatchWriter("TABLEA", 10000000, 10000, 5);
    for (int i = 5; i > 0; --i) {
      byte[] key = convert(i);
      Mutation m = new Mutation(new Text(key));
      m.put("cf", "cq", "value");
      wr.addMutation(m);
    }
    wr.close();

    Scanner scanner = connector.createScanner("TABLEA", new Authorizations());
    Iterator<Map.Entry<Key, Value>> iterator = scanner.iterator();
    while (iterator.hasNext()) {
      Map.Entry entry = iterator.next();
      Key key = entry.getKey();
      System.out.println(toHexString(key.getRow().getBytes()));
    }
  }
}
This code produces this output:
7ffffffa0005
7ffffffb0004
7ffffffc0003
7ffffffd0002
7ffffffe0001
I'll leave it to the reader to remove the debugging code.

How Can I Use Reverse Sort On Generic Accumulo Keys?

This note shows how to reverse the sorting of Accumulo (actually, the row values). As you might know, the standard sort order is lexical. This first example shows a standard usage of a mock Accumulo instance. Notice that records inserted in reverse order (5, 4, 3, 2, 1) but are printed in lexical order.

public static void main(String[] args) throws Exception {
  // connect to a mock Accumulo instance.
  Instance mock = new MockInstance("development");
  Connector connector = mock.getConnector("root", "password".getBytes());
  connector.tableOperations().create("TABLEA");
  BatchWriter wr = connector.createBatchWriter("TABLEA", 10000000, 10000, 5);

  // insert five records in reverse order.
  for (int i = 5; i > 0; --i) {
    byte[] key = ("row_" + String.format("%04d", i)).getBytes();
    Mutation m = new Mutation(new Text(key));
    m.put("cf_" + String.format("%04d", i), "cq_" + 1, "val_" + 1);
    wr.addMutation(m);
  }
  wr.close();

  // display records; notice they are lexically sorted.
  Scanner scanner = connector.createScanner("TABLEA", new Authorizations());
  Iterator<Map.Entry&lyKey, Value>> iterator = scanner.iterator();
  while (iterator.hasNext()) {
    Map.Entry entry = iterator.next();
    Key key = entry.getKey();
    System.out.println("ROW ID: " + key.getRow());
  }
 }

The above code displays:

ROW ID: row_0001
ROW ID: row_0002
ROW ID: row_0003
ROW ID: row_0004
ROW ID: row_0005

Reverse sorting is accomplished by subtracting each byte in the row id from 255 as shown in the example below.

static byte[] convert(byte[] row) {
  byte[] rv = new byte[row.length * 2];
  for (int i = 0; i < row.length; i++) {
    rv[i] = (byte) (255 - row[i]);
  }
  for (int i = 0; i < row.length; i++) {
    rv[i + row.length] = row[i];
  }
  return rv;
 }

 public static void main(String[] args) throws Exception {
  // connect to a mock Accumulo instance.
  Instance mock = new MockInstance("development");
  Connector connector = mock.getConnector("root", "password".getBytes());
  connector.tableOperations().create("TABLEA");
  BatchWriter wr = connector.createBatchWriter("TABLEA", 10000000, 10000, 5);

  // insert five records in reverse order.
  for (int i = 5; i > 0; --i) {
    byte[] key = ("row_" + String.format("%04d", i)).getBytes();
    byte[] reverse_key = convert(key);
    Mutation m = new Mutation(new Text(reverse_key));
    m.put("cf_" + String.format("%04d", i), "cq_" + 1, "val_" + 1);
    wr.addMutation(m);
  }
  wr.close();

  // display records; notice they are lexically sorted.
  Scanner scanner = connector.createScanner("TABLEA", new Authorizations());
  Iterator<Map.Entry&lyKey, Value>> iterator = scanner.iterator();
  while (iterator.hasNext()) {
    Map.Entry entry = iterator.next();
    Key key = entry.getKey();
    System.out.println("ROW ID: " + key.getRow());
  }
 }

The above code displays:

ROW ID: ��������row_0005
ROW ID: ��������row_0004
ROW ID: ��������row_0003
ROW ID: ��������row_0002
ROW ID: ��������row_0001

It's important to note that for teaching purposes, the key is stored once in reverse format and again normally. Thus when displayed you can verify that the key is stored in reverse order. Normally the convert method is used like this:

static byte[] convert(byte[] row) {
  byte[] rv = new byte[row.length];
  for (int i = 0; i < row.length; i++) {
    rv[i] = (byte) (255 - row[i]);
  }
  return rv;
 }

For some use cases, you can convert the row bytes in place:

static byte[] convert(byte[] row) {
  for (int i = 0; i < row.length; i++) {
    row[i] = (byte) (255 - row[i]);
  }
  return row;
 }