1.创建好waf-cdn 并且设置好规则和log存储方式为s3
2. Amazon Athena 服务 使用 (注意s3桶位置相同得区域)
https://docs.aws.amazon.com/zh_cn/athena/latest/ug/waf-logs.html#waf-example-count-matched-ip-addresses
官方文档参考,建一个分区查询表
不能直接使用 因为是cdn 资源需要修改相关字段
CREATE EXTERNAL TABLE `waf_logs`(
`timestamp` bigint,
`formatversion` int,
`webaclid` string,
`terminatingruleid` string,
`terminatingruletype` string,
`action` string,
`terminatingrulematchdetails` array <
struct <
conditiontype: string,
sensitivitylevel: string,
location: string,
matcheddata: array < string >
>
>,
`httpsourcename` string,
`httpsourceid` string,
`rulegrouplist` array <
struct <
rulegroupid: string,
terminatingrule: struct <
ruleid: string,
action: string,
rulematchdetails: array <
struct <
conditiontype: string,
sensitivitylevel: string,
location: string,
matcheddata: array < string >
>
>
>,
nonterminatingmatchingrules: array <
struct <
ruleid: string,
action: string,
overriddenaction: string,
rulematchdetails: array <
struct <
conditiontype: string,
sensitivitylevel: string,
location: string,
matcheddata: array < string >
>
>,
challengeresponse: struct <
responsecode: string,
solvetimestamp: string
>,
captcharesponse: struct <
responsecode: string,
solvetimestamp: string
>
>
>,
excludedrules: string
>
>,
`ratebasedrulelist` array <
struct <
ratebasedruleid: string,
limitkey: string,
maxrateallowed: int
>
>,
`nonterminatingmatchingrules` array <
struct <
ruleid: string,
action: string,
rulematchdetails: array <
struct <
conditiontype: string,
sensitivitylevel: string,
location: string,
matcheddata: array < string >
>
>,
challengeresponse: struct <
responsecode: string,
solvetimestamp: string
>,
captcharesponse: struct <
responsecode: string,
solvetimestamp: string
>
>
>,
`requestheadersinserted` array <
struct <
name: string,
value: string
>
>,
`responsecodesent` string,
`httprequest` struct <
clientip: string,
country: string,
headers: array <
struct <
name: string,
value: string
>
>,
uri: string,
args: string,
httpversion: string,
httpmethod: string,
requestid: string
>,
`labels` array <
struct <
name: string
>
>,
`captcharesponse` struct <
responsecode: string,
solvetimestamp: string,
failureReason: string
>,
`challengeresponse` struct <
responsecode: string,
solvetimestamp: string,
failureReason: string
>,
`ja3Fingerprint` string,
`oversizefields` string,
`requestbodysize` int,
`requestbodysizeinspectedbywaf` int
)
PARTITIONED BY (
`region` string,
`date` string)
ROW FORMAT SERDE
'org.openx.data.jsonserde.JsonSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION
's3://<aws-waf-logs-xxx>/AWSLogs/<accountID>/WAFLogs/cloudfront/<waf-acl>'
TBLPROPERTIES(
'projection.enabled' = 'true',
'projection.region.type' = 'enum',
'projection.region.values' = 'cloudfront',
'projection.date.type' = 'date',
'projection.date.range' = '2024/07/08,NOW',
'projection.date.format' = 'yyyy/MM/dd',
'projection.date.interval' = '1',
'projection.date.interval.unit' = 'DAYS',
'storage.location.template' = 's3://<aws-waf-logs-xxx>/AWSLogs/<accountID>/WAFLogs/${region}/<waf-acl>/${date}/')
修改< >中的字符为自己的资源
测试查询
SELECT
COUNT(httpRequest.country) as count,
httpRequest.country
FROM waf_logs
WHERE
terminatingruletype='RATE_BASED'
GROUP BY httpRequest.country
ORDER BY count
LIMIT 100;
SELECT
COUNT(*) AS count,
webaclid,
action,
httprequest.clientip,
httprequest.uri
FROM waf_logs
WHERE terminatingruleid='<id>'
GROUP BY webaclid, action, httprequest.clientip, httprequest.uri
ORDER BY count DESC
LIMIT 100;
具体的sql 字段需要修改成自己的 可以先检索全表 查看字段 方便搜索