1. This site uses cookies. By continuing to use this site, you are agreeing to our use of cookies. Learn More.

Share your python scripts

Discussion in 'Other Scripting Languages' started by Sheraf, Feb 8, 2016.

  1. Sheraf

    Sheraf Registered Member

    Joined:
    Jan 19, 2014
    Messages:
    61
    Likes Received:
    8
    Here is a simple script i made, it checks the number of results for each query in a csv file and will return write the number of result in the next column on the query row (in the csv file)

    Code:
    #!/usr/bin/env python2# -*- coding: utf-8 -*-
    
    
    import unicodecsv
    import lxml.html
    import urllib
    import re
    import datetime
    import codecs
    import time
    import sys
    
    
    DIGITS_ONLY = re.compile(r"[^0-9]")
    TODAY = datetime.datetime.today().strftime("%m/%d/%Y")
    
    
    from selenium import webdriver
    from selenium.webdriver.common.keys import Keys
    from selenium.webdriver.common.proxy import *
    
    
    profile = webdriver.FirefoxProfile()
    # To connect though a proxy, this is a socks proxy setup when i use a ssh tunnel, that's why it connects to localhost
    #profile.set_preference('network.proxy.type', 1)
    #profile.set_preference('network.proxy.socks', '127.0.0.1')
    #profile.set_preference('network.proxy.socks_port', 8080)
    
    
    QUERIES = []
    RESULTS = []
    
    
    f = open(sys.argv[1], "rb")
    reader = unicodecsv.reader(f, encoding='utf-8')
    for query in reader:
        if query[0].strip() != "":
            QUERIES.append(query[0].strip())
            RESULTS.append(query)
    
    
    
    
    driver = webdriver.Firefox(profile)
    #driver.get("http://monip.org/")
    #sys.exit()
    
    
    for index, query in enumerate(QUERIES):
        if index == 0:
            RESULTS[index].append(TODAY)
            continue
    
    
        driver.get("https://www.google.com/search?q=%s" % urllib.quote(query))
        source = driver.page_source
        while "CaptchaRedirect" in source:
            print "Enter captcha..."
            source = driver.page_source
            time.sleep(1)
        data = lxml.html.fromstring(source)
        try:
            nb_results = data.cssselect("#resultStats")[0].text
            nb_results = DIGITS_ONLY.sub('', nb_results)
        except:
            nb_results = "0"
        print "%s => %s" % (query, nb_results)
        RESULTS[index].append(nb_results)
    
    
    driver.quit()
    
    
    
    
    f = codecs.open(sys.argv[1], "wb")
    f.write(codecs.BOM_UTF8)
    w = unicodecsv.writer(f, encoding='utf-8')
    for row in RESULTS:
        w.writerow(row)
    f.close()
    
    
    print "%s updated." % sys.argv[1]
    
    sys.argv[1] has to be passed as the filename of your csv file.

    first column is the query to run in google, (example: site:blackhatworld.com) then each time you run the script it will add a new column with the date as header and the number of results returned for each query. the first row is the header so it won't be executed as a query.