Share your python scripts

Discussion in 'Other Scripting Languages' started by Sheraf, Feb 8, 2016.

  1. Sheraf

    Sheraf Registered Member

    Joined:
    Jan 19, 2014
    Messages:
    62
    Likes Received:
    8
    Here is a simple script i made, it checks the number of results for each query in a csv file and will return write the number of result in the next column on the query row (in the csv file)

    Code:
    #!/usr/bin/env python2# -*- coding: utf-8 -*-
    
    
    import unicodecsv
    import lxml.html
    import urllib
    import re
    import datetime
    import codecs
    import time
    import sys
    
    
    DIGITS_ONLY = re.compile(r"[^0-9]")
    TODAY = datetime.datetime.today().strftime("%m/%d/%Y")
    
    
    from selenium import webdriver
    from selenium.webdriver.common.keys import Keys
    from selenium.webdriver.common.proxy import *
    
    
    profile = webdriver.FirefoxProfile()
    # To connect though a proxy, this is a socks proxy setup when i use a ssh tunnel, that's why it connects to localhost
    #profile.set_preference('network.proxy.type', 1)
    #profile.set_preference('network.proxy.socks', '127.0.0.1')
    #profile.set_preference('network.proxy.socks_port', 8080)
    
    
    QUERIES = []
    RESULTS = []
    
    
    f = open(sys.argv[1], "rb")
    reader = unicodecsv.reader(f, encoding='utf-8')
    for query in reader:
        if query[0].strip() != "":
            QUERIES.append(query[0].strip())
            RESULTS.append(query)
    
    
    
    
    driver = webdriver.Firefox(profile)
    #driver.get("http://monip.org/")
    #sys.exit()
    
    
    for index, query in enumerate(QUERIES):
        if index == 0:
            RESULTS[index].append(TODAY)
            continue
    
    
        driver.get("https://www.google.com/search?q=%s" % urllib.quote(query))
        source = driver.page_source
        while "CaptchaRedirect" in source:
            print "Enter captcha..."
            source = driver.page_source
            time.sleep(1)
        data = lxml.html.fromstring(source)
        try:
            nb_results = data.cssselect("#resultStats")[0].text
            nb_results = DIGITS_ONLY.sub('', nb_results)
        except:
            nb_results = "0"
        print "%s => %s" % (query, nb_results)
        RESULTS[index].append(nb_results)
    
    
    driver.quit()
    
    
    
    
    f = codecs.open(sys.argv[1], "wb")
    f.write(codecs.BOM_UTF8)
    w = unicodecsv.writer(f, encoding='utf-8')
    for row in RESULTS:
        w.writerow(row)
    f.close()
    
    
    print "%s updated." % sys.argv[1]
    
    sys.argv[1] has to be passed as the filename of your csv file.

    first column is the query to run in google, (example: site:blackhatworld.com) then each time you run the script it will add a new column with the date as header and the number of results returned for each query. the first row is the header so it won't be executed as a query.