尚硅谷大数据技术之Hive(新)第10章 Hive实战之谷粒影音
10.3 准备工作
10.3.1 创建表
创建表:gulivideo_ori,gulivideo_user_ori,
创建表:gulivideo_orc,gulivideo_user_orc
gulivideo_ori:
create table gulivideo_ori( videoId string, uploader string, age int, category array<string>, length int, views int, rate float, ratings int, comments int, relatedId array<string>) row format delimited fields terminated by "\t" collection items terminated by "&" stored as textfile; |
gulivideo_user_ori:
create table gulivideo_user_ori( uploader string, videos int, friends int) row format delimited fields terminated by "\t" stored as textfile; |
然后把原始数据插入到orc表中
gulivideo_orc:
create table gulivideo_orc( videoId string, uploader string, age int, category array<string>, length int, views int, rate float, ratings int, comments int, relatedId array<string>) clustered by (uploader) into 8 buckets row format delimited fields terminated by "\t" collection items terminated by "&" stored as orc; |
gulivideo_user_orc:
create table gulivideo_user_orc( uploader string, videos int, friends int) row format delimited fields terminated by "\t" stored as orc; |
10.3.2 导入ETL后的数据
gulivideo_ori:
load data inpath "/gulivideo/output/video/2008/0222" into table gulivideo_ori; |
gulivideo_user_ori:
load data inpath "/gulivideo/user/2008/0903" into table gulivideo_user_ori; |
10.3.3 向ORC表插入数据
gulivideo_orc:
insert into table gulivideo_orc select * from gulivideo_ori; |
gulivideo_user_orc:
insert into table gulivideo_user_orc select * from gulivideo_user_ori; |