IT技术互动交流平台

Hbase葱岭探秘 过滤器Api

来源:IT165收集  发布日期:2016-12-22 20:36:19

Hbase中提供了许多的过滤器接口,以此来对数据进行过滤,使得查询出想要的数据。

行过滤器

针对行信息进行过滤,参数中可以采用前缀匹配、按位与、或、异或以及子串匹配等匹配的方式。同时可以控制EQUAL、NOT_EQUAL选项进行控制筛选数据的条件。

/**
     * 行过滤器 BinaryComparator 
        NullComparator:是不是空值
     * BitComparator:通过BitwiseOp类提供的按位与、或、异或操作进行位级别比较 RegexStringComparator:正则匹配
     * SubStringComparator:子串是不是包含进行匹配
     */
    private static void testRowFilter() {
        try {
            HTable table = new HTable(config, 'testtable');

            Scan scan = new Scan();
            scan.addColumn('col1'.getBytes(), 'name'.getBytes());
            // 行过滤器
            Filter filter = new RowFilter(CompareOp.EQUAL,
                    new BinaryComparator('row2'.getBytes()));
            scan.setFilter(filter);

            ResultScanner result = table.getScanner(scan);
            for (Result res : result) {
                log.info('行过滤器>' + res);
            }

            // 正则的行过滤器
            Filter filter2 = new RowFilter(CompareOp.EQUAL,
                    new RegexStringComparator('.*.2'));
            scan.setFilter(filter2);
            ResultScanner resultRegx = table.getScanner(scan);
            for (Result res : resultRegx) {
                log.info('正则>' + res);
            }

            Filter filterSubString = new RowFilter(CompareOp.EQUAL,
                    new SubstringComparator('w2'));
            scan.setFilter(filterSubString);
            ResultScanner resultSubString = table.getScanner(scan);
            for (Result res : resultSubString) {
                log.info('子串>' + res);
            }

            table.close();
        } catch (IOException e) {
            log.error(e);
        }
    }

列族过滤器

根据列族的数据进行筛选,形式和上面的行过滤器类似,通过控制相应的参数中的筛选的条件进行相应的筛选。

/**
     * 列族过滤器
     */
    private static void testFamlyFilter() {

        try {
            HTable table = new HTable(config, 'testtable');

            Filter filter = new FamilyFilter(CompareOp.EQUAL,
                    new BinaryComparator('col1'.getBytes()));
            Scan scan = new Scan('row2'.getBytes(), filter);
            ResultScanner result = table.getScanner(scan);
            for (Result res : result) {
                log.info(res);
            }

            Filter filterNull = new FamilyFilter(CompareOp.EQUAL,
                    new RegexStringComparator('.*.1'));
            Scan scanNull = new Scan('row2'.getBytes(), filterNull);
            scanNull.addFamily('col1'.getBytes());
            ResultScanner resultNull = table.getScanner(scanNull);
            if (resultNull != null) {
                for (Result res : resultNull) {
                    log.info(res);
                }
            } else {
                log.info('null');
            }

            table.close();
        } catch (IOException e) {
            log.error(e);
        }

    }

列名过滤器

和上面几个过滤器类似,这里是根据列进行筛选,设置相应的条件后就可以进行相应的筛选了。

/**
     * 列名过滤器
     */
    public static void testColumFilter() {

        try {
            HTable table = new HTable(config, 'testtable');

            Filter filter = new QualifierFilter(CompareOp.EQUAL,
                    new BinaryComparator('name'.getBytes()));
            Scan scan = new Scan('row2'.getBytes(), filter);
            ResultScanner result = table.getScanner(scan);
            for (Result res : result) {
                log.info(res);
            }

            Get get = new Get('row2'.getBytes());
            get.setFilter(filter);
            Result resultGet = table.get(get);
            log.info(resultGet);

            table.close();
        } catch (IOException e) {
            log.info(e);
        }

    }

参考列过滤器

参考列过滤器根据列族和列限定符进行筛选,返回与参考列相同时间戳的行的所有键值对。

/**
     * 参考列过滤器
     */
    public static void testDependentColumnFilter() {

        try {
            HTable table = new HTable(config, 'testtable');

            Filter filter = new DependentColumnFilter('col1'.getBytes(),
                    'name'.getBytes(), false);
            Scan scan = new Scan();
            scan.setFilter(filter);
            ResultScanner resu = table.getScanner(scan);
            for (Result result : resu) {
                log.info(result);
            }

            Get get = new Get('row2'.getBytes());
            get.setFilter(filter);
            Result result = table.get(get);
            log.info(result);

            table.close();
        } catch (IOException e) {
            log.error(e);
        }

    }

单列过滤器

通过一列的值进行判断是不是需要进行过滤。

/**
     * 单列过滤器
     */
    public static void testSingleColumnValueFilter() {

        try {
            HTable table = new HTable(config, 'testtable');

            Filter filter = new SingleColumnValueFilter('col1'.getBytes(),
                    'name'.getBytes(), CompareOp.EQUAL, 'wy'.getBytes());
            Scan scan = new Scan();
            scan.setFilter(filter);
            ResultScanner result = table.getScanner(scan);
            for (Result res : result) {
                log.info(res);
            }

            Get get = new Get('row2'.getBytes());
            get.setFilter(filter);
            Result resultGet = table.get(get);
            log.info(resultGet);

            table.close();
        } catch (IOException e) {
            log.info(e);
        }
    }

前缀过滤器

根据前缀进行匹配行键的数据,本例中给出的是以row为前缀的行的数据。

/**
     * 前缀过滤器
     */
    public static void testPrefixFilter() {

        try {
            HTable table = new HTable(config, 'testtable');

            Filter filter = new PrefixFilter('row'.getBytes());
            Scan scan = new Scan();

            scan.setFilter(filter);
            ResultScanner result = table.getScanner(scan);
            for (Result res : result) {
                log.info('res>' + res);
            }

            Get get = new Get('row2'.getBytes());
            Result resultGet = table.get(get);
            log.info('get>' + resultGet);

            table.close();
        } catch (IOException e) {
            log.info(e);
        }
    }

分页过滤器

通过pageFilter设置一页中数据的条数,注意,在重新设置起始行的时候,要使得新的行和数据库中有区别,否则,会死循环无法停止。

/**
     * 分页过滤器
     */
    public static void testPageFilter() {

        try {
            HTable table = new HTable(config, 'testtable');

            Filter filter = new PageFilter(10);
            int totalRows = 0;
            byte[] lastRow = null;
            Scan scan = new Scan();
            while (true) {

                scan.setFilter(filter);
                if (lastRow != null) {
                    // 加上0后表示新的开始防止row的内容一样造成死循环
                    byte[] startRow = Bytes.add(lastRow, POSTFIX);
                    scan.setStartRow(startRow);
                }

                ResultScanner resultScan = table.getScanner(scan);

                int localRows = 0;
                Result result = resultScan.next();
                while (result != null) {

                    log.info(result);
                    localRows++;
                    totalRows++;

                    lastRow = result.getRow();
                    result = resultScan.next();
                }
                if (localRows == 0)
                    break;
            }
            log.info(totalRows);

            table.close();
        } catch (IOException e) {
            log.info(e);
        }

    }
/**
     * 列分页过滤
     */
    public static void testColumnPaginationFilter() {

        try {
            HTable table = new HTable(config, 'testtable');

            Filter filter = new ColumnPaginationFilter(5, 10);
            Scan scan = new Scan();
            scan.setFilter(filter);

            ResultScanner result = table.getScanner(scan);
            for (Result res : result) {
                log.info(res);
            }

            table.close();
        } catch (IOException e) {
            log.info(e);
        }
    }

Skip过滤器

与ValueFilter结合使用,如果一行中某一列不符合要求的话直接被过滤掉。

/**
     * 跳过过滤器
     */
    public static void testSkipFilter() {

        try {
            HTable table = new HTable(config, 'testtable');

            Filter filt = new ValueFilter(CompareOp.NOT_EQUAL,
                    new BinaryComparator('v'.getBytes()));
            Scan scanValue = new Scan();
            scanValue.setFilter(filt);
            ResultScanner ress = table.getScanner(scanValue);

            for (Result result : ress) {
                log.info('<' + result);
            }

            Filter filter = new SkipFilter(filt);

            Scan scan = new Scan();
            scan.setFilter(filter);
            ResultScanner result = table.getScanner(scan);
            for (Result res : result) {
                log.info('>' + res);
            }

            table.close();
        } catch (IOException e) {
            log.info(e);
        }
    }

全匹配过滤器

在遇到某个条件之前的数据全部查询出来,直到遇到满足该条件的数据之后结束查询。

    /**
     * 全匹配过滤器
     */
    public static void testWhileMatch() {

        try {
            HTable table = new HTable(config, 'testtable');

            Filter filt = new RowFilter(CompareOp.NOT_EQUAL,
                    new BinaryComparator('row6'.getBytes()));

            Scan scan = new Scan();
            scan.setFilter(filt);
            ResultScanner results = table.getScanner(scan);
            for (Result res : results) {
                log.info('>' + res);
            }

            Filter filter = new WhileMatchFilter(filt);
            scan.setFilter(filter);
            ResultScanner resultScan = table.getScanner(scan);
            for (Result res : resultScan) {
                log.info('<' + res);
            }

            table.close();
        } catch (IOException e) {
            log.info(e);
        }

    }

过滤器组合

可以将上面的过个过滤器放在一个List中,然后形成多个过滤器的组合的形式进行过滤。

    /**
     * 过滤器组合
     */
    public static void testFilterList() {

        List<Filter> filterList = new ArrayList<Filter>();
        Filter filter1 = new SingleColumnValueFilter('col1'.getBytes(),
                'name'.getBytes(), CompareOp.EQUAL, 'x'.getBytes());
        filterList.add(filter1);

        Filter filter2 = new RowFilter(CompareOp.NOT_EQUAL,
                new BinaryComparator('row2'.getBytes()));
        filterList.add(filter2);

        FilterList filters = new FilterList(filterList);
        Scan scan = new Scan();
        scan.setFilter(filters);

        try {
            HTable table = new HTable(config, 'testtable');

            ResultScanner result = table.getScanner(scan);

            for (Result res : result) {
                log.info(res);
            }

            table.close();
        } catch (IOException e) {
            log.info(e);
        }
    }

转载注明出处:http://blog.csdn.net/wangyang1354/article/details/53761559

Tag标签: 葱岭   过滤器  
  • 专题推荐

About IT165 - 广告服务 - 隐私声明 - 版权申明 - 免责条款 - 网站地图 - 网友投稿 - 联系方式
本站内容来自于互联网,仅供用于网络技术学习,学习中请遵循相关法律法规