1、Hbase葱岭探秘--过滤器Api Hbase中提供了许多的过滤器接口,以此来对数据进行过滤,使得查询出想要的数据。 行过滤器 针对行信息进行过滤,参数中可以采用前缀匹配、按位与、或、异或以及子串匹配等匹配的方式。同时可以控制EQUAL、NOT_EQUAL选项进行控制筛选数据的条件。 /** * 行过滤器 BinaryComparator NullComparator:是不是空值 * BitComparator:通过BitwiseOp类提供的按位与、或、异或操作进行位级别比较 RegexStringComparator:正则匹
2、配 * SubStringComparator:子串是不是包含进行匹配 */ private static void testRowFilter() { try { HTable table = new HTable(config, "testtable"); Scan scan = new Scan(); scan.addColumn("col1".getBytes(), "name".getBytes()); // 行过滤器
3、 Filter filter = new RowFilter(CompareOp.EQUAL, new BinaryComparator("row2".getBytes())); scan.setFilter(filter); ResultScanner result = table.getScanner(scan); for (Result res : result) { log.info("行过滤器>" + res)
4、 } // 正则的行过滤器 Filter filter2 = new RowFilter(CompareOp.EQUAL, new RegexStringComparator(".*.2")); scan.setFilter(filter2); ResultScanner resultRegx = table.getScanner(scan); for (Result res : result
5、Regx) { log.info("正则>" + res); } Filter filterSubString = new RowFilter(CompareOp.EQUAL, new SubstringComparator("w2")); scan.setFilter(filterSubString); ResultScanner resultSubString = table.getScanner(scan)
6、 for (Result res : resultSubString) { log.info("子串>" + res); } table.close(); } catch (IOException e) { log.error(e); } 列族过滤器 根据列族的数据进行筛选,形式和上面的行过滤器类似,通过控制相应的参数中的筛选的条件进行相应的筛选。 /** * 列族过滤器 *
7、/ private static void testFamlyFilter() { try { HTable table = new HTable(config, "testtable"); Filter filter = new FamilyFilter(CompareOp.EQUAL, new BinaryComparator("col1".getBytes())); Scan scan = new Scan("row2".getByt
8、es(), filter); ResultScanner result = table.getScanner(scan); for (Result res : result) { log.info(res); } Filter filterNull = new FamilyFilter(CompareOp.EQUAL, new RegexStringComparator(".*.1"));
9、 Scan scanNull = new Scan("row2".getBytes(), filterNull); scanNull.addFamily("col1".getBytes()); ResultScanner resultNull = table.getScanner(scanNull); if (resultNull != null) { for (Result res : resultNull) { log.info(res
10、); } } else { log.info("null"); } table.close(); } catch (IOException e) { log.error(e); } 列名过滤器 和上面几个过滤器类似,这里是根据列进行筛选,设置相应的条件后就可以进行相应的筛选了。 /** * 列名过滤器 */ public static
11、 void testColumFilter() { try { HTable table = new HTable(config, "testtable"); Filter filter = new QualifierFilter(CompareOp.EQUAL, new BinaryComparator("name".getBytes())); Scan scan = new Scan("row2".getBytes(), filter);
12、 ResultScanner result = table.getScanner(scan); for (Result res : result) { log.info(res); } Get get = new Get("row2".getBytes()); get.setFilter(filter); Result resultGet = table.get(get); log.i
13、nfo(resultGet); table.close(); } catch (IOException e) { log.info(e); } 参考列过滤器根据列族和列限定符进行筛选,返回与参考列相同时间戳的行的所有键值对。 /** * 参考列过滤器 */ public static void testDependentColumnFilter() { try { HTable table = new HT
14、able(config, "testtable"); Filter filter = new DependentColumnFilter("col1".getBytes(), "name".getBytes(), false); Scan scan = new Scan(); scan.setFilter(filter); ResultScanner resu = table.getScanner(scan); for
15、Result result : resu) { log.info(result); } Get get = new Get("row2".getBytes()); get.setFilter(filter); Result result = table.get(get); log.info(result); table.close(); } catch (IOException
16、e) { log.error(e); } } 单列过滤器 通过一列的值进行判断是不是需要进行过滤。 /** * 单列过滤器 */ public static void testSingleColumnValueFilter() { try { HTable table = new HTable(config, "testtable"); Filter filter = new SingleColumnValu
17、eFilter("col1".getBytes(), "name".getBytes(), CompareOp.EQUAL, "wy".getBytes()); Scan scan = new Scan(); scan.setFilter(filter); ResultScanner result = table.getScanner(scan); for (Result res : result) { log.in
18、fo(res); } Get get = new Get("row2".getBytes()); get.setFilter(filter); Result resultGet = table.get(get); log.info(resultGet); table.close(); } catch (IOException e) { log.info(e); }
19、前缀过滤器 根据前缀进行匹配行键的数据,本例中给出的是以row为前缀的行的数据。 /** * 前缀过滤器 */ public static void testPrefixFilter() { try { HTable table = new HTable(config, "testtable"); Filter filter = new PrefixFilter("row".getBytes()); Scan scan = new Scan();
20、 scan.setFilter(filter); ResultScanner result = table.getScanner(scan); for (Result res : result) { log.info("res>" + res); } Get get = new Get("row2".getBytes()); Result resultGet = table.get(get);
21、 log.info("get>" + resultGet); table.close(); } catch (IOException e) { log.info(e); } } 分页过滤器 通过pageFilter设置一页中数据的条数,注意,在重新设置起始行的时候,要使得新的行和数据库中有区别,否则,会死循环无法停止。 /** * 分页过滤器 */ public static void testPageFilter() {
22、 try { HTable table = new HTable(config, "testtable"); Filter filter = new PageFilter(10); int totalRows = 0; byte[] lastRow = null; Scan scan = new Scan(); while (true) { scan.setFilter(fil
23、ter); if (lastRow != null) { // 加上0后表示新的开始防止row的内容一样造成死循环 byte[] startRow = Bytes.add(lastRow, POSTFIX); scan.setStartRow(startRow); } ResultScanner resultScan = table.getScanner(scan)
24、 int localRows = 0; Result result = resultScan.next(); while (result != null) { log.info(result); localRows++; totalRows++; lastRow = result.getRow();
25、 result = resultScan.next(); } if (localRows == 0) break; } log.info(totalRows); table.close(); } catch (IOException e) { log.info(e); } /** * 列分页过滤
26、 */ public static void testColumnPaginationFilter() { try { HTable table = new HTable(config, "testtable"); Filter filter = new ColumnPaginationFilter(5, 10); Scan scan = new Scan(); scan.setFilter(filter); Res
27、ultScanner result = table.getScanner(scan); for (Result res : result) { log.info(res); } table.close(); } catch (IOException e) { log.info(e); } Skip过滤器 与ValueFilter结合使用,如果一行中某一列不符合要求的话直接被过滤掉。 /**
28、 * 跳过过滤器 */ public static void testSkipFilter() { try { HTable table = new HTable(config, "testtable"); Filter filt = new ValueFilter(CompareOp.NOT_EQUAL, new BinaryComparator("v".getBytes())); Scan scanValue = new S
29、can(); scanVFilter(filt); ResultScanner ress = table.getScanner(scanValue); for (Result result : ress) { log.info("<" + result); } Filter filter = new SkipFilter(filt); Scan scan = new Scan();
30、 scan.setFilter(filter); ResultScanner result = table.getScanner(scan); for (Result res : result) { log.info(">" + res); } table.close(); } catch (IOException e) { log.info(e); } 全匹配过滤器
31、在遇到某个条件之前的数据全部查询出来,直到遇到满足该条件的数据之后结束查询。 /** * 全匹配过滤器 */ public static void testWhileMatch() { try { HTable table = new HTable(config, "testtable"); Filter filt = new RowFilter(CompareOp.NOT_EQUAL, new BinaryComparat
32、or("row6".getBytes())); Scan scan = new Scan(); scan.setFilter(filt); ResultScanner results = table.getScanner(scan); for (Result res : results) { log.info(">" + res); } Filter filter = new WhileMatchF
33、ilter(filt); scan.setFlter(filter); ResultScanner resultScan = table.getScanner(scan); for (Result res : resultScan) { log.info("<" + res); } table.close(); } catch (IOException e) { log.info(e)
34、
}
过滤器组合
可以将上面的过个过滤器放在一个List中,然后形成多个过滤器的组合的形式进行过滤。
/**
* 过滤器组合
*/
public static void testFilterList() {
List
35、"name".getBytes(), CompareOp.EQUAL, "x".getBytes()); filterList.add(filter1); Filter filter2 = new RowFilter(CompareOp.NOT_EQUAL, new BinaryComparator("row2".getBytes())); filterList.add(filter2); FilterList filters = new FilterList(filterList
36、); Scan scan = new Scan(); scan.setFilter(filters); try { HTable table = new HTable(config, "testtable"); ResultScanner result = table.getScanner(scan); for (Result res : result) { log.info(res); } table.close(); } catch (IOException e) { log.info(e); } }






