尚硅谷大数据技术之电信客服

3.2.4 数据查询方式二

思路:

  1. a) 已知要查询的手机号码以及起始时间节点和结束时间节点,查询该节点范围内的该手机号码的通话记录。
  2. b) 拼装startRowKey和stopRowKey,即扫描范围,要想拼接出扫描范围,首先需要了解rowkey组成结构,我们再来复习一下,举个大栗子:

rowkey:

分区号_手机号码1_通话建立时间_手机号码2_主(被)叫标记_通话持续时间

01_15837312345_20170527081033_1_0180

  1. c) 比如按月查询通话记录,则startRowKey举例:

regionHash_158373123456_20170501000000

stopRowKey举例:

regionHash_158373123456_20170601000000

注意:startRowKey和stopRowKey设计时,后面的部分已经被去掉。

尖叫提示:rowKey的扫描范围为前闭后开。

尖叫提示:rowKey默认是有序的,排序规则为字符的按位比较

  1. d) 如果查找所有的,需要多次scan表,每次scan设置为下一个时间窗口即可,该操作可放置于for循环中。

编码:

1) 新建工具类:ScanRowkeyUtil

该类主要用于根据传入指定的查询时间,生成若干组startRowKey和stopRowKey

package com.atguigu.utils;

import java.text.ParseException;

import java.text.SimpleDateFormat;

import java.util.ArrayList;

import java.util.Calendar;

import java.util.Date;

import java.util.List;

/**

 * 该类主要用于根据用户传入的手机号以及开始和结束时间点,按月生成多组rowkey

 */

public class ScanRowkeyUtil {

    private String telephone;

    private String startDateString;

    private String stopDateString;

    List<String[]> list = null;

    int index = 0;

    private SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");

    private SimpleDateFormat sdf2 = new SimpleDateFormat("yyyyMMddHHmmss");

    public ScanRowkeyUtil(String telephone, String startDateString, String stopDateString) {

        this.telephone = telephone;

        this.startDateString = startDateString;

        this.stopDateString = stopDateString;

        list = new ArrayList<>();

        genRowKeys();

    }

    //01_15837312345_201711

    //15837312345 2017-01-01 2017-05-01

    public void genRowKeys(){

        int regions = Integer.valueOf(PropertyUtil.getProperty("hbase.regions.count"));

        try {

            Date startDate = sdf.parse(startDateString);

            Date stopDate = sdf.parse(stopDateString);

            //当前开始时间

            Calendar currentStartCalendar = Calendar.getInstance();

            currentStartCalendar.setTimeInMillis(startDate.getTime());

            //当前结束时间

            Calendar currentStopCalendar = Calendar.getInstance();

            currentStopCalendar.setTimeInMillis(startDate.getTime());

            currentStopCalendar.add(Calendar.MONTH, 1);

            while (currentStopCalendar.getTimeInMillis() <= stopDate.getTime()) {

                String regionCode = HBaseUtil.genPartitionCode(telephone, sdf2.format(new Date(currentStartCalendar.getTimeInMillis())), regions);

                // 01_15837312345_201711

                String startRowKey = regionCode + "_" + telephone + "_" + sdf2.format(new Date(currentStartCalendar.getTimeInMillis()));

                String stopRowKey = regionCode + "_" + telephone + "_" + sdf2.format(new Date(currentStopCalendar.getTimeInMillis()));

                String[] rowkeys = {startRowKey, stopRowKey};

                list.add(rowkeys);

                currentStartCalendar.add(Calendar.MONTH, 1);

                currentStopCalendar.add(Calendar.MONTH, 1);

            }

        } catch (ParseException e) {

            e.printStackTrace();

        }

    }

    /**

     * 判断list集合中是否还有下一组rowkey

     * @return

     */

    public boolean hasNext() {

        if(index < list.size()){

            return true;

        }else{

            return false;

        }

    }

    /**

     *  取出list集合中存放的下一组rowkey

     * @return

     */

    public String[] next() {

        String[] rowkeys = list.get(index);

        index++;

        return rowkeys;

    }

}

2) 新建测试单元类 :HBaseScanTest2

package com.atguigu;

import com.atguigu.utils.DateTimeUtil;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.hbase.Cell;

import org.apache.hadoop.hbase.CellUtil;

import org.apache.hadoop.hbase.HBaseConfiguration;

import org.apache.hadoop.hbase.client.HTable;

import org.apache.hadoop.hbase.client.Result;

import org.apache.hadoop.hbase.client.ResultScanner;

import org.apache.hadoop.hbase.client.Scan;

import org.apache.hadoop.hbase.util.Bytes;

import org.junit.Test;

import java.io.IOException;

import java.text.ParseException;

public class HBaseScanTest2 {

    private static Configuration conf = null;

    static {

        conf = HBaseConfiguration.create();

    }

    @Test

    public void scanTest() throws IOException, ParseException {

        String call = "14473548449";

        String startPoint = "2017-01-01";

        String stopPoint = "2017-09-01";

        HTable hTable = new HTable(conf, "ns_telecom:calllog");

        Scan scan = new Scan();

        ScanRowkeyUtil scanRowkeyUtil = new ScanRowkeyUtil (call, startPoint, stopPoint);

        while (scanRowkeyUtil.hasNext()) {

            String[] rowKeys = scanRowkeyUtil.next();

            scan.setStartRow(Bytes.toBytes(rowKeys[0]));

            scan.setStopRow(Bytes.toBytes(rowKeys[1]));

            System.out.println("时间范围" + rowKeys[0].substring(15, 21) + "---" + rowKeys[1].substring(15, 21));

            ResultScanner resultScanner = hTable.getScanner(scan);

            //每一个rowkey对应一个result

            for (Result result : resultScanner) {

                //每一个rowkey里面包含多个cell

                Cell[] cells = result.rawCells();

                StringBuilder sb = new StringBuilder();

                sb.append(Bytes.toString(result.getRow())).append(",");

                for (Cell c : cells) {

                    sb.append(Bytes.toString(CellUtil.cloneValue(c))).append(",");

                }

                System.out.println(sb.toString());

            }

        }

    }

}

3) 运行测试

观察是否已经按照时间范围查询出对应的数据。