用Python获取Google的下拉框自动完成提示文本
Python #关键字提取2014-02-20 22:05
直接上代码了。
#!/usr/bin/python # Copyright (C) 2010 <x01110011@gmail.com> http://yige.org/python/ import urllib2, sys, getopt, time import socket from urllib import quote from xml.etree.ElementTree import XMLParser from threading import Thread print "\n***************************" print "* Google Suggest Scrapper *" print "* Coded by ____________ *" print "* x01110011@gmail.com *" print "***************************\n" global i global recursiveLevel global outputFile recursiveLevel = 0 outputFile = '' def usage(): print "Usage: GoogleSuggest.py [options] \n" print " -k: Keywords: Keywords to use (separated by #)" print " -f: File: File to read keywords from (overrides -k)" print " -r: Recursive level (0-5): Use suggested keywords to get more keywords up to -r times [Default: 0]" print " -t: Threads: Number of threads (default 5)" print " -o: Output file: Save keywords found to file\n\n" print "Examples:\n" print " GoogleSuggest.py -k keyword1" print " GoogleSuggest.py -k keyword1#keyword2#keyword3 -r 1" print " GoogleSuggest.py -f keyword_file.txt -o keywords_found.txt -t 10" sys.exit() class do(Thread): def __init__( self, keyword ): Thread.__init__(self) self.keyword = keyword self.result = [] def run(self): global i tmp = [] tmp2 = [] for level in range(0, recursiveLevel+1): if level == 0: keyword = self.keyword.split()[0] tmp = self.getKeywords(keyword) else: seedkeyword = keyword while len(tmp): keyword = tmp.pop(0) if keyword == seedkeyword: continue else: if level<recursiveLevel: tmp2 = self.getKeywords(keyword) else: self.getKeywords(keyword) tmp = tmp2 time.sleep(0) i = i-1 def getKeywords(self, keyword): tmp = [] url = 'http://clients1.google.com/complete/search?output=toolbar&q='+quote(keyword) print url response = urllib2.urlopen(url) cont = response.read() x = XMLParser() x.feed(cont) tree = x.close() for e in tree.findall('CompleteSuggestion'): #self.result.append([[keyword], [e.find('suggestion').get('data'), e.find('num_queries').get('int')]]) self.result.append([[keyword], [e.find('suggestion').get('data')]]) tmp.append(e.find('suggestion').get('data')) return tmp def startThreads(keywords): global i i = 0 threads = [] ret = [] while len(keywords): try: if i<th: keyword = keywords.pop(0) i = i+1 thread = do(keyword) thread.start() threads.append(thread) except KeyboardInterrupt: print 'Suspended by user...\n' sys.exit() for t in threads: t.join() for r in t.result: ret.append(r) output(ret) def output(ret): global outputFile output = [] while len(ret): data = ret.pop(0)[1][0] output.append(data+'\n') print data if outputFile: try: f = open(outputFile,'w') except: print 'Can\'t open output file\n' sys.exit() f.writelines(output) f.close() def run(argv): global th global recursiveLevel global outputFile th = 5 if len(sys.argv) < 3: usage() try: opts, args = getopt.getopt(argv,'k:f:r:t:o:') except getopt.GetoptError: usage() for opt,arg in opts : if opt == '-k': inputKeywords = arg elif opt == '-f': try: inputFile = open(arg, "r") except: print 'Can\'t open keywords file\n' sys.exit() elif opt == '-r': recursiveLevel = int(arg) elif opt == '-t': th = arg elif opt == '-o': outputFile = arg try: inputFile except NameError: inputKeywords = inputKeywords.split('#') else: inputKeywords = inputFile.readlines() startThreads(inputKeywords) if __name__ == "__main__": try: run(sys.argv[1:]) except KeyboardInterrupt: print "Ctrl+C Exit By USER...\n" sys.exit()
相关文章
- Python日期操作 2012/12/25
- Python收发邮件 2012/11/27
- Python串口通信 2012/11/27
- 用Python来实现的adsl拨号 2012/11/25
- Python实现的命令行通讯录 2012/11/25
- Python中unicode码转utf8的方法 2012/11/25
- Python二叉树算法实现 2012/11/25
- Python实现的豆瓣电影信息查询 2012/11/25
- Python实现双倍超立方数 2012/11/25
- 用Python实现定时关机 2012/11/25