HBase advanced features: filters (1)

Level 1: Use filters to query data in specified rows

Knowledge points

1. Steps to use filters:

(1) Create a filter: RowFilter(CompareOperator op,ByteArrayComparable rowComparator). The first parameter receives the comparison operation object, and the second parameter receives the condition.

The first parameter has many values to match various scenarios. The value table is as follows:

Operation Description
CompareOperator.LESS Match values less than the set value
CompareOperator.LESS_OR_EQUAL Match values less than or equal to the set value
CompareOperator.EQUAL Match a value equal to the set value
CompareOperator.NOT_EQUAL Match and set Values that are not equal to the set value
CompareOperator.GREATER_OR_EQUAL Match values that are greater than or equal to the set value
CompareOperator.GREATER Match values greater than the set value
CompareOperator.NO_OP Exclude all values

(2) Set filters.

Programming requirements

Please complete the function query(String tName). The data you need to query is as follows. The table name will be passed in as a parameter of the method:

  • Query the basic_info column family gender column, and the row key is the value of 2018;

  • Query the school_info column family college column, and the row key is greater than the value of 2018;

  • Query the basic_info column family name column, and the row key is less than or equal to the value of 2020.

package step1;

import java.io.IOException;

import org.apache.hadoop.cli.util.*;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.*;

public class Task {

public void query(String tName) throws Exception {
/********* Begin *********/
Configuration config = new Configuration();
Connection conn = ConnectionFactory.createConnection(config);
TableName tableName = TableName.valueOf(tName);
        Table table = conn.getTable(tableName);

        Scan scan1=new Scan();
        scan1.addColumn(Bytes.toBytes("basic_info"), Bytes.toBytes("gender"));
Filter filter1 = new RowFilter(CompareOperator.EQUAL,new BinaryComparator(Bytes.toBytes("2018")));
        scan1.setFilter(filter1);
        ResultScanner scanner1 = table.getScanner(scan1);
 System.out.println("row:2018");
 for (Result result : scanner1) {
 for(Cell cell : result.listCells()){
 System.out.println("basic_info:gender " + new String(CellUtil.cloneValue(cell),"utf-8") );
 }
 }
        scanner1.close();
\t\t
Scan scan2=new Scan();
        scan2.addColumn(Bytes.toBytes("school_info"), Bytes.toBytes("college"));
Filter filter2 = new RowFilter(CompareOperator.GREATER,new BinaryComparator(Bytes.toBytes("2018")));
        scan2.setFilter(filter2);
        ResultScanner scanner2 = table.getScanner(scan2);
 for (Result result : scanner2) {
            System.out.println("row:" + new String(result.getRow(),"utf-8"));
 for(Cell cell : result.listCells()){
 System.out.println("school_info:college " + new String(CellUtil.cloneValue(cell),"utf-8") );
 }
 }
        scanner2.close();
\t\t
        Scan scan3=new Scan();
        scan3.addColumn(Bytes.toBytes("basic_info"), Bytes.toBytes("name"));
Filter filter3 = new RowFilter(CompareOperator.LESS_OR_EQUAL,new BinaryComparator(Bytes.toBytes("2020")));
        scan3.setFilter(filter3);
        ResultScanner scanner3 = table.getScanner(scan3);
 for (Result result : scanner3) {
            System.out.println("row:" + new String(result.getRow(),"utf-8"));
 for(Cell cell : result.listCells()){
 System.out.println("basic_info:name " + new String(CellUtil.cloneValue(cell),"utf-8") );
 }
 }
        scanner3.close();
\t\t
conn.close();
/********* End *********/
}
}

Level 2: Use regular expressions and substrings to match row keys

Knowledge points

Comparator subclasses

Comparator Description
BinaryComparator Use Bytes.compareTo() to compare the current value with the threshold
BinaryPrefixComparator Similar to the above, but start prefix matching from the left end
NullComparator Does not match, only determines whether the current value is null
BitComparator Perform bit-level comparison through the bitwise AND (AND), OR (OR), and XOR (XOR) operations provided by the BitwiseOp class
RegexStringComparator (regular comparator) According to a regular expression, match the data in the table when instantiating this comparator
SubStringComparator (substring filter) Treat the threshold and data in the table as String instances, and match strings through the contains() operation

Programming requirements

Query the data in table t2_student_table. Requirements are as follows:

  • Query the row key starting with 1 and ending with 9, and output the values of all columns in the row;

  • Query the row key containing 231 and output the values of all columns in the row.

package step2;

import java.io.IOException;

import org.apache.hadoop.cli.util.*;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.*;
import org.apache.hadoop.hbase.filter.SubstringComparator;

public class Task {

public void query() throws Exception {
/********* Begin *********/
Configuration config = new Configuration();
Connection conn = ConnectionFactory.createConnection(config);
TableName tablename=TableName.valueOf("t2_student_table");
        Table table=conn.getTable(tablename);

        Scan scan1=new Scan();
        Filter filter1 = new RowFilter(CompareOperator.EQUAL,new RegexStringComparator("1.*9$")); //Query the row keys starting with 1 and ending with 9
        scan1.setFilter(filter1);
        ResultScanner scanner1 = table.getScanner(scan1);
        for (Result result : scanner1) {
 System.out.println("row:" + new String(result.getRow(),"utf-8"));
 for(Cell cell : result.listCells()){
 String family = Bytes.toString(CellUtil.cloneFamily(cell));
 String qualifier = Bytes.toString(CellUtil.cloneQualifier(cell));
 String value = Bytes.toString(CellUtil.cloneValue(cell));
 System.out.println(family + ":" + qualifier + " " + value);
 }
 }
 scanner1.close();
\t\t
Scan scan2=new Scan();
        Filter filter2 = new RowFilter(CompareOperator.EQUAL,new SubstringComparator("231")); //Query the row key containing 231
        scan2.setFilter(filter2);
        ResultScanner scanner2 = table.getScanner(scan2);
        for (Result result : scanner2) {
 System.out.println("row:" + new String(result.getRow(),"utf-8"));
 for(Cell cell : result.listCells()){
 String family = Bytes.toString(CellUtil.cloneFamily(cell));
 String qualifier = Bytes.toString(CellUtil.cloneQualifier(cell));
 String value = Bytes.toString(CellUtil.cloneValue(cell));
 System.out.println(family + ":" + qualifier + " " + value);
 }
 }
 scanner2.close();
\t
conn.close();
/********* End *********/
}
}

Level 3: column family filter, value filter, column name filter

Programming requirements

Use the filter to complete the following query operations on the table t3_student_table:

  • Query all columns of column family school_info in row key 1019 and output the values;

  • Query all columns whose column names contain the letter c in the row key 2020, and output the values;

  • All rows in the query table contain the value of sheet and output the value.

package step3;

import java.io.IOException;

import org.apache.hadoop.cli.util.*;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.*;

import org.apache.hadoop.hbase.filter.SubstringComparator;

public class Task {

public void query() throws Exception {
/********* Begin *********/
Configuration config = new Configuration();
Connection conn = ConnectionFactory.createConnection(config);
TableName tableName = TableName.valueOf(Bytes.toBytes("t3_student_table"));
 Table table = conn.getTable(tableName);
\t\t
Filter filter1 = new FamilyFilter(CompareOperator.EQUAL,new BinaryComparator(Bytes.toBytes("school_info"))); //Column family filter
 Get get1 = new Get(Bytes.toBytes("1019"));
 get1.setFilter(filter1);
 Result result1 = table.get(get1);
 System.out.println("row:" + new String(result1.getRow(),"utf-8"));
 for(Cell cell : result1.listCells()){
 String family = Bytes.toString(CellUtil.cloneFamily(cell));
 String qualifier = Bytes.toString(CellUtil.cloneQualifier(cell));
 String value = Bytes.toString(CellUtil.cloneValue(cell));
 System.out.println(family + ":" + qualifier + " " + value);
 }

        Filter filter2 = new QualifierFilter(CompareOperator.EQUAL,
 new SubstringComparator("c")); //Column name filter
 Get get2 = new Get(Bytes.toBytes("2020"));
 get2.setFilter(filter2);
 Result result2 = table.get(get2);
 System.out.println("row:" + new String(result2.getRow(),"utf-8"));
 for(Cell cell : result2.listCells()){
 String family = Bytes.toString(CellUtil.cloneFamily(cell));
 String qualifier = Bytes.toString(CellUtil.cloneQualifier(cell));
 String value = Bytes.toString(CellUtil.cloneValue(cell));
 System.out.println(family + ":" + qualifier + " " + value);
 }
\t\t
Scan scan3 = new Scan();
 Filter filter3 = new ValueFilter(CompareOperator.EQUAL, new SubstringComparator("张")); //Value filter
 scan3.setFilter(filter3);
 ResultScanner scanner3 = table.getScanner(scan3);
 for (Result result : scanner3) {
 System.out.println("row:" + new String(result.getRow(),"utf-8"));
 for(Cell cell : result.listCells()){
 String family = Bytes.toString(CellUtil.cloneFamily(cell));
 String qualifier = Bytes.toString(CellUtil.cloneQualifier(cell));
 String value = Bytes.toString(CellUtil.cloneValue(cell));
 System.out.println(family + ":" + qualifier + " " + value);
 }
 }
        scanner3.close();
conn.close();
/********* End *********/
}
}