1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# Search Web Log for Referred GETs Who Have POSTed
#
# Notes: Used mainly to detect click fraud
#
# Author: Markus Diersbock <markus@swingnote.com>
#
# Copyright(c)2014 SwingNote LLC. All Rights Reserved.
#
# Terms: You may freely use this source, provided
# all copyright notices remain intact.


# main weblog fields
class Fields:
    ip = 0
    method = 5
    resource = 6
    referrer = 10

# your weblog to parse
LOG_FILE = "your_weblog.txt"

# domain to filter on
TARGET_DOMAIN = "suspect_domain.com"

# in/out files
log_file = open(LOG_FILE, "r")
report_file = open(TARGET_DOMAIN + ".txt", "w")

# init
target_ips = []
get_hits = 0
post_hits = 0

# report header
report_file.write("\nReferring Domain: " + TARGET_DOMAIN + "\n")
report_file.write(("=" * 50) + "\n")


def get_lite_request_details(request_details):
    return (request_details[Fields.ip].ljust(17) +
            request_details[Fields.method].replace("\"", "").ljust(6) +
            request_details[Fields.resource] + "\t" +
            request_details[Fields.referrer])

# parse weblog
for request in log_file:
    request_details = request.split()

    # if referrer in log file, append ip to list
    if (TARGET_DOMAIN in request_details[Fields.referrer]) and (request_details[Fields.resource] == "/" or (".php" in request_details[Fields.resource])):
        # add ip without dupes
        if request_details[Fields.ip] not in target_ips:
            target_ips.append(request_details[Fields.ip])

        report_file.write(get_lite_request_details(request_details) + "\n")
        get_hits += 1
    # if current post ip is in ip list
    elif ("POST" in request_details[Fields.method]) and (request_details[Fields.ip] in target_ips):
        report_file.write(get_lite_request_details(request_details) + "\n")
        post_hits += 1

# report footer
report_file.write(("=" * 50) + "\n")
report_file.write("Hits for '{0}' = {1} GETs / {2} POSTs".format(TARGET_DOMAIN, get_hits, post_hits) + "\n")
print("Hits for '{0}' = {1}/{2}".format(TARGET_DOMAIN, get_hits, post_hits))

# cleanup
report_file.close()
log_file.close()