尚硅谷大数据技术之Hive(新)第10章 Hive实战之谷粒影音

10.3 准备工作

10.3.1 创建表

创建表:gulivideo_ori,gulivideo_user_ori,

创建表:gulivideo_orc,gulivideo_user_orc

gulivideo_ori:

create table gulivideo_ori(

    videoId string,

    uploader string,

    age int,

    category array<string>,

    length int,

    views int,

    rate float,

    ratings int,

    comments int,

    relatedId array<string>)

row format delimited

fields terminated by "\t"

collection items terminated by "&"

stored as textfile;

gulivideo_user_ori:

create table gulivideo_user_ori(

    uploader string,

    videos int,

    friends int)

row format delimited

fields terminated by "\t"

stored as textfile;

然后把原始数据插入到orc表中

gulivideo_orc:

create table gulivideo_orc(

    videoId string,

    uploader string,

    age int,

    category array<string>,

    length int,

    views int,

    rate float,

    ratings int,

    comments int,

    relatedId array<string>)

clustered by (uploader) into 8 buckets

row format delimited fields terminated by "\t"

collection items terminated by "&"

stored as orc;

gulivideo_user_orc:

create table gulivideo_user_orc(

    uploader string,

    videos int,

    friends int)

row format delimited

fields terminated by "\t"

stored as orc;

10.3.2 导入ETL后的数据

gulivideo_ori:

load data inpath "/gulivideo/output/video/2008/0222" into table gulivideo_ori;

gulivideo_user_ori:

load data inpath "/gulivideo/user/2008/0903" into table gulivideo_user_ori;

10.3.3 向ORC表插入数据

gulivideo_orc:

insert into table gulivideo_orc select * from gulivideo_ori;

gulivideo_user_orc:

insert into table gulivideo_user_orc select * from gulivideo_user_ori;