线上开启了hive的并发支持,因此job之间会有lock的竞争问题。
在产生锁的竞争时会有如下的信息:
conflicting lock present for table mode EXCLUSIVE
在一些场景下,有些job运行完毕但是不自动释放锁(需要手动unlock或者去zookeeper里面删除掉),因此需要对锁进行监控,主要是用show locks的信息。
具体的python脚本如下:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
|
import
os
import
subprocess
import
util
import
re
import
sendmail
import
time
import
sys
import
property
if
__name__
=
=
"__main__"
:
allInfo
=
[]
now
=
time.time()
sql
=
"show locks"
lock_query_id
=
""
lock_create_time
=
""
lock_sql
=
""
allLock
=
util.hive_run_cmd(sql)
for
line
in
allLock:
if
len
(re.split(
'\t| '
,line)[
0
].split(
'@'
))
=
=
2
:
dataBase
=
re.split(
'\t| '
,line)[
0
].split(
'@'
)[
0
]
dataTable
=
re.split(
'\t| '
,line)[
0
].split(
'@'
)[
1
]
lockType
=
re.split(
'\t| '
,line)[
-
1
].strip()
print
dataBase
+
"==="
+
dataTable
+
"==="
+
lockType
util.get_lock_info(allInfo,database
=
dataBase,table
=
dataTable,keytype
=
lockType)
else
:
dataBase
=
re.split(
'\t| '
,line)[
0
].split(
'@'
)[
0
]
dataTable
=
re.split(
'\t| '
,line)[
0
].split(
'@'
)[
1
]
dataPartition
=
re.split(
'\t| '
,line)[
0
].split(
'@'
)[
2
].replace(
'/'
,
','
)
lockType
=
re.split(
'\t| '
,line)[
-
1
].strip()
print
dataBase
+
"==="
+
dataTable
+
"==="
+
lockType
+
"===="
+
dataPartition
util.get_lock_info(allInfo,database
=
dataBase,table
=
dataTable,keytype
=
lockType,partition
=
dataPartition)
print
allInfo
if
len
(allInfo)
=
=
0
:
pass
#sys.exit(0)
else
:
mailfile
=
open
(
"/home/hdfs/ericni/lock_monitor/mail/lock_table_"
+
str
(now)
+
".html"
,
"w+"
)
mailcontent
=
"""
<table border=1>
<tr align="right" valign="middle">
<th>TABLE</th><th>LOCK_TYPE</th><th>LOCK_TIME</th><th>QUERY_ID</th><th>SQL</th>
</tr>
"""
for
line
in
allInfo:
if
len
(line) <
5
:
pass
else
:
re_table
=
line[
0
]
re_type
=
line[
1
]
re_time
=
float
(now)
-
float
(line[
3
])
print
re_time
re_query
=
line[
2
]
re_sql
=
line[
4
]
if
(re_time >
=
1800
and
str
(re_type)
=
=
"SHARED"
)
or
(re_time >
=
600
and
str
(re_type)
=
=
"EXCLUSIVE"
):
print
"++++++++++++++++++++++++++++++++++++++++++++++"
cmd
=
"/bin/touch /tmp/alert.file"
os.popen(cmd)
mailcontent
+
=
"""
<tr align="right" valign="middle">
<td>%s</td>
"""
%
(re_table)
mailcontent
+
=
"""
<td>%s</td>
"""
%
(re_type)
mailcontent
+
=
"""
<td bgcolor='#F80000'>%s</td>
"""
%
(
round
(
float
(re_time),
2
))
mailcontent
+
=
"""
<td>%s</td>
"""
%
(re_query)
mailcontent
+
=
"""
<td>%s</td>
"""
%
(re_sql)
mailcontent
+
=
"</tr>"
else
:
pass
mailcontent
+
=
"</table>"
mailfile.write(mailcontent)
if
not
os.path.isfile(
"/tmp/alert.file"
):
print
"no need to alert"
sys.exit(
0
)
else
:
print
"+++______++++"
cmd
=
"/bin/rm -f /tmp/alert.file"
os.popen(cmd)
sendmail.send_mail_withoutSSL(
"HIVE table lock alert"
,mailcontent.encode(
'utf-8'
),
property
.mail_list_hdfs)
|
产生的报警邮件如下:
本文转自菜菜光 51CTO博客,原文链接:http://blog.51cto.com/caiguangguang/1344439,如需转载请自行联系原作者