Hive 练习题解答

题目

在 Github 上面找了一些题目来练习
https://github.com/zzyb/Hive_interview-question

解答

answer
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
 -- 第一道面试题
-- 编写Hive的HQL语句求出连续三天登陆的用户id
select distinct user_id
from (
select user_id,
lag(login_date,1) over (partition by user_id order by login_date) prev,
login_date now,
lead(login_date,1) over (partition by user_id order by login_date) next
from login_log) t where datediff(now,prev) = 1 and datediff(next,now)=1;

-- 第一道面试题
-- 编写Hive的HQL语句求出每 个店铺的当月销售额和累计到当月的总销售额

select name,mon,monMoney,accMonMoney
from(
select name,mon,sum(monmoney) over (partition by name,mon) monMoney,
sum(monmoney) over (partition by name order by mon) accMonMoney,
row_number() over (partition by name,mon order by mon) top
from fangwen2)t where top=1;

-- 第二道面试题
-- 编写Hive的HQL语句求出每个用户截止到每月为止的最大单月访问次数和累计到该月的总访问次数
select usr,mon,num,col1 monPv,col2 maxMonPv,col3 accMonPv from(
select usr,mon,num,sum(num) over (partition by usr,mon) col1,
max(num) over (partition by usr,mon) col2,
sum(num) over (partition by usr order by mon) col3,
row_number() over (partition by usr,mon order by mon) col4 from fangwen) t where col4=1;

select usr,mon,maxPv,totalPv from(
select usr,mon,num,
max(num) over (partition by usr,mon) maxPV,
sum(num) over (partition by usr order by mon) totalPv,
row_number() over (partition by usr,mon order by mon) top
from fangwen) t where top=1;


-- 第三道面试题
-- 编写Hive的HQL语句按照day和mac分组,求出每组的销量累计总和,追加到每条记录的后面
select day,mac,color,num,
sum(num) over (partition by day,mac)
from mac;


-- TopN
-- 第六道面试题

-- 1、求出每一年的最高温度(年份,最高温度)
select t.c_year,max(t.c_temp) from (
select year(from_unixtime(unix_timestamp(substr(line,0,8),'yyyyMMdd'))) c_year,substr(line,9,2) c_temp from wendu
) t group by t.c_year;

select dt,year(dt),temp
from(
select concat_ws('-',substring(line,0,4),substring(line,5,2),substring(line,7,2)) dt,
substring(line,9,2) temp,
row_number() over (partition by substring(line,0,4) order by substring(line,9,2) DESC) top
from wendu) subT where top=1;

-- 2、求出每一年的最高温度是那一天(日期, 最高温度)

select dt,temp
from(
select concat_ws('-',substring(line,0,4),substring(line,5,2),substring(line,7,2)) dt,
substring(line,9,2) temp,
row_number() over (partition by substring(line,0,4) order by substring(line,9,2) DESC) top
from wendu) subT where top=1;

select t.c_year,t.c_date,t.c_max from
(select
year(from_unixtime(unix_timestamp(substr(line,0,8),'yyyyMMdd'))) c_year,
from_unixtime(unix_timestamp(substr(line,0,8),'yyyyMMdd')) c_date,
row_number() over(partition by year(from_unixtime(unix_timestamp(substr(line,0,8),'yyyyMMdd'))) order by substr(line,9,2) DESC) as c_rank,
substr(line,9,2) c_max
from wendu) t where t.c_rank = 1;

-- 第四道面试题
-- 求出所有数学课程成绩 大于 语文课程成绩的学生的学号
-- 方法1 case when
select * from(
select sid,
max(case course when 'yuwen' then course end) yuwen,
max(case course when 'yuwen' then score end) yuwenScore,
max(case course when 'shuxue' then course end) shuxue,
max(case course when 'shuxue' then score end) shuxueScore,
max(case course when 'yingyu' then course end) yingyu,
max(case course when 'yingyu' then score end) yingyuScore
from chengji group by sid) t where t.shuxueScore > t.yuwenScore;

-- 方法2 join
select sid,t1score yuwen,t2score shuxue from(
select * from (
select sid,course,score t1score
from chengji
where course = 'yuwen'
) t1
join (
select sid,course,score t2score from chengji where course = 'shuxue'
) t2 on t1.sid = t2.sid) t3 where t3.t2score > t3.t1score;

-- 第五道面试题
-- id_course.id id_course.course
-- 1 a
-- 1 b
-- 1 c
-- 1 e
-- 2 a
-- 2 c
-- 2 d
-- 2 f
-- 3 a
-- 3 b
-- 3 c
-- 3 e
-- 转换为
-- id a b b c d e f
-- 1 1 1 1 1 0 1 0
-- 2 1 0 0 1 1 0 1
-- 3 1 1 1 1 0 1 0

select id,
max(case course when 'a' then 1 ELSE 0 end) a,
max(case course when 'b' then 1 ELSE 0 end) b,
max(case course when 'b' then 1 ELSE 0 end) b,
max(case course when 'c' then 1 ELSE 0 end) c,
max(case course when 'd' then 1 ELSE 0 end) d,
max(case course when 'e' then 1 ELSE 0 end) e,
max(case course when 'f' then 1 ELSE 0 end) f
from id_course group by id;

-- 第七道面试题
-- 1,huangbo,45,a-c-d-f
-- 2,xuzheng,36,b-c-d-e
-- 3,huanglei,41,c-d-e
-- 4,liushishi,22,a-d-e
-- 5,liudehua,39,e-f-d
-- 6,liuyifei,35,a-d-e
-- 求出每种爱好中,年龄最大的人
select id,name,maxAge,favor
from (
select id, name, max(age) over (partition by favor) maxAge, favor,
row_number() over (partition by favor order by age DESC) top
from aihao lateral VIEW explode(split(favors, '-')) t1 as favor
)t2 where top=1;
-- 列出每个爱好年龄最大的两个人,并且列出名字。
select id,name,maxAge,favor
from (
select id, name, max(age) over (partition by favor) maxAge, favor,
row_number() over (partition by favor order by age DESC) top
from aihao lateral VIEW explode(split(favors, '-')) t1 as favor
)t2 where top<=2;

-- 第十道面试题
-- 日期,产品id,产品当日收入,产品当日成本
-- 2018-03-01,a,3000,2500
-- 2018-03-01,b,4000,3200
-- 2018-03-01,c,3200,2400
-- 2018-03-01,d,3000,2500
-- 2018-03-02,a,3000,2500
-- 2018-03-02,b,1500,800
-- 2018-03-02,c,2600,1800
-- 2018-03-02,d,2400,1000
-- 2018-03-03,a,3100,2400
-- 2018-03-03,b,2500,2100
-- 2018-03-03,c,4000,1200
-- 2018-03-03,d,2500,1900
-- 2018-03-04,a,2800,2400
-- 2018-03-04,b,3200,2700
-- 2018-03-04,c,2900,2200
-- 2018-03-04,d,2700,2500
-- 2018-03-05,a,2700,1000
-- 2018-03-05,b,1800,200
-- 2018-03-05,c,5600,2200
-- 2018-03-05,d,1200,1000
-- 2018-03-06,a,2900,2500
-- 2018-03-06,b,4500,2500
-- 2018-03-06,c,6700,2300
-- 2018-03-06,d,7500,5000
-- 2018-04-01,a,3000,2500
-- 2018-04-01,b,4000,3200
-- 2018-04-01,c,3200,2400
-- 2018-04-01,d,3000,2500
-- 2018-04-02,a,3000,2500
-- 2018-04-02,b,1500,800
-- 2018-04-02,c,4600,1800
-- 2018-04-02,d,2400,1000
-- 2018-04-03,a,6100,2400
-- 2018-04-03,b,4500,2100
-- 2018-04-03,c,6000,1200
-- 2018-04-03,d,3500,1900
-- 2018-04-04,a,2800,2400
-- 2018-04-04,b,3200,2700
-- 2018-04-04,c,2900,2200
-- 2018-04-04,d,2700,2500
-- 2018-04-05,a,4700,1000
-- 2018-04-05,b,3800,200
-- 2018-04-05,c,5600,2200
-- 2018-04-05,d,5200,1000
-- 2018-04-06,a,2900,2500
-- 2018-04-06,b,4500,2500
-- 2018-04-06,c,6700,2300
-- 2018-04-06,d,7500,5000

-- 源数据3月份与4月份日利润相同,添加一条数据使其不同。
insert into goods values ('2018-04-07','d','5000','3000');

-- 1. 输出每个产品,在2018年期间,每个月的净利润,日均成本。
select d_month,profit,avgCost from (
select concat(year(dt),'-',month(dt)) d_month,name,income,cost,
(sum(income) over(partition by concat(year(dt),'-',month(dt))) -
sum(cost) over(partition by concat(year(dt),'-',month(dt)))) profit,
avg(cost) over(partition by concat(year(dt),'-',month(dt))) avgCost,
row_number() over (partition by concat(year(dt),'-',month(dt)) order by name) top
from goods where year(dt) = '2018') t_month where t_month.top=1;

-- 2. 输出每个产品,在2018年3月中每一天与上一天相比,成本的变化。
select
dt,cost,
(cost - LAG(cost,1,cost) over(partition by concat(year(dt),'-',month(dt)) order by dt))
from goods where concat(year(dt),'-',month(dt)) = '2018-3';

-- 3. 输出2018年4月,有多少个产品总收入大于22000元,必须用一句SQL语句实现,且不允许使用关联表查询、子查询。
select concat(year(dt),'-',month(dt)),name,sum(income) sumIncome
from goods where concat(year(dt),'-',month(dt)) = '2018-4'
group by name,concat(year(dt),'-',month(dt)) having sumIncome>22000;

-- 4. 输出2018年4月,总收入最高的那个产品,每日的收入,成本,过程使用over()函数。
select g.dt,maxT.name,maxT.maxIncome,g.income,g.cost from(
select name,totalIncome,max(totalIncome) over() maxIncome from(
select
dt,name,income,cost,
sum(income) over (partition by name) totalIncome
from goods where concat(year(dt),'-',month(dt)) = '2018-4') t limit 1
) maxT join goods g on maxT.name=g.name where concat(year(dt),'-',month(dt)) = '2018-4';



作者

MisakaWater

发布于

2022-01-23

更新于

2022-01-23

许可协议