用Python获取Google的下拉框自动完成提示文本
Python #关键字提取2014-02-20 22:05
直接上代码了。
#!/usr/bin/python
# Copyright (C) 2010 <x01110011@gmail.com> http://yige.org/python/
import urllib2, sys, getopt, time
import socket
from urllib import quote
from xml.etree.ElementTree import XMLParser
from threading import Thread
print "\n***************************"
print "* Google Suggest Scrapper *"
print "* Coded by ____________ *"
print "* x01110011@gmail.com *"
print "***************************\n"
global i
global recursiveLevel
global outputFile
recursiveLevel = 0
outputFile = ''
def usage():
print "Usage: GoogleSuggest.py [options] \n"
print " -k: Keywords: Keywords to use (separated by #)"
print " -f: File: File to read keywords from (overrides -k)"
print " -r: Recursive level (0-5): Use suggested keywords to get more keywords up to -r times [Default: 0]"
print " -t: Threads: Number of threads (default 5)"
print " -o: Output file: Save keywords found to file\n\n"
print "Examples:\n"
print " GoogleSuggest.py -k keyword1"
print " GoogleSuggest.py -k keyword1#keyword2#keyword3 -r 1"
print " GoogleSuggest.py -f keyword_file.txt -o keywords_found.txt -t 10"
sys.exit()
class do(Thread):
def __init__( self, keyword ):
Thread.__init__(self)
self.keyword = keyword
self.result = []
def run(self):
global i
tmp = []
tmp2 = []
for level in range(0, recursiveLevel+1):
if level == 0:
keyword = self.keyword.split()[0]
tmp = self.getKeywords(keyword)
else:
seedkeyword = keyword
while len(tmp):
keyword = tmp.pop(0)
if keyword == seedkeyword:
continue
else:
if level<recursiveLevel:
tmp2 = self.getKeywords(keyword)
else:
self.getKeywords(keyword)
tmp = tmp2
time.sleep(0)
i = i-1
def getKeywords(self, keyword):
tmp = []
url = 'http://clients1.google.com/complete/search?output=toolbar&q='+quote(keyword)
print url
response = urllib2.urlopen(url)
cont = response.read()
x = XMLParser()
x.feed(cont)
tree = x.close()
for e in tree.findall('CompleteSuggestion'):
#self.result.append([[keyword], [e.find('suggestion').get('data'), e.find('num_queries').get('int')]])
self.result.append([[keyword], [e.find('suggestion').get('data')]])
tmp.append(e.find('suggestion').get('data'))
return tmp
def startThreads(keywords):
global i
i = 0
threads = []
ret = []
while len(keywords):
try:
if i<th:
keyword = keywords.pop(0)
i = i+1
thread = do(keyword)
thread.start()
threads.append(thread)
except KeyboardInterrupt:
print 'Suspended by user...\n'
sys.exit()
for t in threads:
t.join()
for r in t.result:
ret.append(r)
output(ret)
def output(ret):
global outputFile
output = []
while len(ret):
data = ret.pop(0)[1][0]
output.append(data+'\n')
print data
if outputFile:
try:
f = open(outputFile,'w')
except:
print 'Can\'t open output file\n'
sys.exit()
f.writelines(output)
f.close()
def run(argv):
global th
global recursiveLevel
global outputFile
th = 5
if len(sys.argv) < 3:
usage()
try:
opts, args = getopt.getopt(argv,'k:f:r:t:o:')
except getopt.GetoptError:
usage()
for opt,arg in opts :
if opt == '-k':
inputKeywords = arg
elif opt == '-f':
try:
inputFile = open(arg, "r")
except:
print 'Can\'t open keywords file\n'
sys.exit()
elif opt == '-r':
recursiveLevel = int(arg)
elif opt == '-t':
th = arg
elif opt == '-o':
outputFile = arg
try:
inputFile
except NameError:
inputKeywords = inputKeywords.split('#')
else:
inputKeywords = inputFile.readlines()
startThreads(inputKeywords)
if __name__ == "__main__":
try:
run(sys.argv[1:])
except KeyboardInterrupt:
print "Ctrl+C Exit By USER...\n"
sys.exit()相关文章
- Python日期操作 2012/12/25
- Python收发邮件 2012/11/27
- Python串口通信 2012/11/27
- 用Python来实现的adsl拨号 2012/11/25
- Python实现的命令行通讯录 2012/11/25
- Python中unicode码转utf8的方法 2012/11/25
- Python二叉树算法实现 2012/11/25
- Python实现的豆瓣电影信息查询 2012/11/25
- Python实现双倍超立方数 2012/11/25
- 用Python实现定时关机 2012/11/25