From bef8106d0f22280546087dc3cd18d5d5dcdd9804 Mon Sep 17 00:00:00 2001 From: DoumanAsh Date: Mon, 6 Apr 2015 08:35:29 +0300 Subject: [PATCH 1/5] [search engine] Nova2 multiprocessing --- src/searchengine/nova/nova2.py | 224 +++++++++++++++++--------------- src/searchengine/nova3/nova2.py | 219 +++++++++++++++++-------------- 2 files changed, 239 insertions(+), 204 deletions(-) diff --git a/src/searchengine/nova/nova2.py b/src/searchengine/nova/nova2.py index 4bbeafbbd..cef9681b7 100644 --- a/src/searchengine/nova/nova2.py +++ b/src/searchengine/nova/nova2.py @@ -26,7 +26,7 @@ # POSSIBILITY OF SUCH DAMAGE. -#VERSION: 1.32 +#VERSION: 1.40 # Author: # Fabien Devaux @@ -37,16 +37,15 @@ # # Licence: BSD -import sys -import threading -import os -import glob import urllib - -import fix_encoding +from os import path +from glob import glob +from sys import argv +from multiprocessing import Pool, cpu_count +from fix_encoding import fix_encoding THREADED = True -CATEGORIES = ('all', 'movies', 'tv', 'music', 'games', 'anime', 'software', 'pictures', 'books') +CATEGORIES = {'all', 'movies', 'tv', 'music', 'games', 'anime', 'software', 'pictures', 'books'} ################################################################################ # Every engine should have a "search" method taking @@ -56,108 +55,125 @@ CATEGORIES = ('all', 'movies', 'tv', 'music', 'games', 'anime', 'software', 'pic # As a convention, try to list results by decrasing number of seeds or similar ################################################################################ -supported_engines = [] +def initialize_engines(): + """ Import available engines -engines = glob.glob(os.path.join(os.path.dirname(__file__), 'engines','*.py')) -for engine in engines: - e = engine.split(os.sep)[-1][:-3] - if len(e.strip()) == 0: continue - if e.startswith('_'): continue - try: - exec "from engines.%s import %s"%(e,e) - supported_engines.append(e) - except: - pass + Return list of available engines + """ + supported_engines = [] -def engineToXml(short_name): - xml = "<%s>\n"%short_name - exec "engine = %s()"%short_name - xml += "%s\n"%engine.name - xml += "%s\n"%engine.url - xml += "" - if hasattr(engine, 'supported_categories'): - supported_categories = engine.supported_categories.keys() - supported_categories.remove('all') - xml += " ".join(supported_categories) - xml += "\n" - xml += "\n"%short_name - return xml + engines = glob(path.join(path.dirname(__file__), 'engines', '*.py')) + for engine in engines: + engi = path.basename(engine).split('.')[0].strip() + if len(engi) == 0 or engi.startswith('_'): + continue + try: + #import engines.[engine] + engine_module = __import__(".".join(("engines", engi))) + #get low-level module + engine_module = getattr(engine_module, engi) + #bind class name + globals()[engi] = getattr(engine_module, engi) + supported_engines.append(engi) + except: + pass -def displayCapabilities(): - """ - Display capabilities in XML format - - - long name - http://example.com - movies music games - - - """ - xml = "" - for short_name in supported_engines: - xml += engineToXml(short_name) - xml += "" - print xml + return supported_engines -class EngineLauncher(threading.Thread): - def __init__(self, engine, what, cat='all'): - threading.Thread.__init__(self) - self.engine = engine - self.what = what - self.cat = cat - def run(self): - if hasattr(self.engine, 'supported_categories'): - if self.cat == 'all' or self.cat in self.engine.supported_categories.keys(): - self.engine.search(self.what, self.cat) - elif self.cat == 'all': - self.engine.search(self.what) +def engines_to_xml(supported_engines): + """ Generates xml for supported engines """ + tab = " " * 4 -if __name__ == '__main__': - # Make sure we enforce utf-8 encoding - fix_encoding.fix_encoding() + for short_name in supported_engines: + search_engine = globals()[short_name]() - if len(sys.argv) < 2: - raise SystemExit('./nova2.py [all|engine1[,engine2]*] \navailable engines: %s'% - (','.join(supported_engines))) + supported_categories = "" + if hasattr(search_engine, "supported_categories"): + supported_categories = " ".join((key for key in search_engine.supported_categories.keys() + if key is not "all")) - if len(sys.argv) == 2: - if sys.argv[1] == "--capabilities": - displayCapabilities() - sys.exit(0) - else: - raise SystemExit('./nova.py [all|engine1[,engine2]*] \navailable engines: %s'% - (','.join(supported_engines))) + yield "".join((tab, "<", short_name, ">\n", + tab, tab, "", search_engine.name, "\n", + tab, tab, "", search_engine.url, "\n", + tab, tab, "", supported_categories, "\n", + tab, "\n")) - engines_list = [e.lower() for e in sys.argv[1].strip().split(',')] +def displayCapabilities(supported_engines): + """ + Display capabilities in XML format + + + long name + http://example.com + movies music games + + + """ + xml = "".join(("\n", + "".join(engines_to_xml(supported_engines)), + "")) + print(xml) - if 'all' in engines_list: - engines_list = supported_engines - - cat = sys.argv[2].lower() - - if cat not in CATEGORIES: - raise SystemExit('Invalid category!') - - what = urllib.quote(' '.join(sys.argv[3:])) - - threads = [] - for engine in engines_list: - try: - if THREADED: - exec "l = EngineLauncher(%s(), what, cat)"%engine - threads.append(l) - l.start() - else: - exec "e = %s()"%engine - if hasattr(engine, 'supported_categories'): - if cat == 'all' or cat in e.supported_categories.keys(): - e.search(what, cat) - elif self.cat == 'all': - e.search(what) - engine().search(what, cat) - except: - pass - if THREADED: - for t in threads: - t.join() +def run_search(engine_list): + """ Run search in engine + + @retval False if any exceptions occured + @retval True otherwise + """ + engine, what, cat = engine_list + try: + engine = engine() + #avoid exceptions due to invalid category + if hasattr(engine, 'supported_categories'): + cat = cat if cat in engine.supported_categories else "all" + engine.search(what, cat) + else: + engine.search(what) + return True + except: + return False + +def main(args): + fix_encoding() + supported_engines = initialize_engines() + + if not args: + raise SystemExit("./nova2.py [all|engine1[,engine2]*] \n" + "available engines: %s" % (','.join(supported_engines))) + + elif args[0] == "--capabilities": + displayCapabilities(supported_engines) + return + + elif len(args) < 3: + raise SystemExit("./nova2.py [all|engine1[,engine2]*] \n" + "available engines: %s" % (','.join(supported_engines))) + + engines_list = set(e.lower() for e in args[0].strip().split(',')) + + if 'all' in engines_list: + engines_list = supported_engines + else: + #discard un-supported engines + engines_list = [engine for engine in engines_list + if engine in supported_engines] + + if not engines_list: + #engine list is empty. Nothing to do here + return + + cat = args[1].lower() + + if cat not in CATEGORIES: + raise SystemExit(" - ".join(('Invalid category', cat))) + + what = urllib.quote(' '.join(args[2:])) + + if THREADED: + pool = Pool(min(len(engines_list), cpu_count())) + pool.map(run_search, ([globals()[engine], what, cat] for engine in engines_list)) + else: + _ = [run_search([globals()[engine], what, cat]) for engine in engines_list] + +if __name__ == "__main__": + main(argv[1:]) diff --git a/src/searchengine/nova3/nova2.py b/src/searchengine/nova3/nova2.py index 0e064cf42..c67852db3 100644 --- a/src/searchengine/nova3/nova2.py +++ b/src/searchengine/nova3/nova2.py @@ -26,7 +26,7 @@ # POSSIBILITY OF SUCH DAMAGE. -#VERSION: 1.24 +#VERSION: 1.40 # Author: # Fabien Devaux @@ -37,14 +37,14 @@ # # Licence: BSD -import sys -import threading -import os -import glob import urllib.parse +from os import path, cpu_count +from glob import glob +from sys import argv +from multiprocessing import Pool THREADED = True -CATEGORIES = ('all', 'movies', 'tv', 'music', 'games', 'anime', 'software', 'pictures', 'books') +CATEGORIES = {'all', 'movies', 'tv', 'music', 'games', 'anime', 'software', 'pictures', 'books'} ################################################################################ # Every engine should have a "search" method taking @@ -54,105 +54,124 @@ CATEGORIES = ('all', 'movies', 'tv', 'music', 'games', 'anime', 'software', 'pic # As a convention, try to list results by decrasing number of seeds or similar ################################################################################ -supported_engines = [] +def initialize_engines(): + """ Import available engines -engines = glob.glob(os.path.join(os.path.dirname(__file__), 'engines','*.py')) -for engine in engines: - e = engine.split(os.sep)[-1][:-3] - if len(e.strip()) == 0: continue - if e.startswith('_'): continue - try: - exec("from engines.%s import %s"%(e,e)) - supported_engines.append(e) - except: - pass + Return list of available engines + """ + supported_engines = [] -def engineToXml(short_name): - xml = "<%s>\n"%short_name - exec("search_engine = %s()"%short_name, globals()) - xml += "%s\n"%search_engine.name - xml += "%s\n"%search_engine.url - xml += "" - if hasattr(search_engine, 'supported_categories'): - supported_categories = list(search_engine.supported_categories.keys()) - supported_categories.remove('all') - xml += " ".join(supported_categories) - xml += "\n" - xml += "\n"%short_name - return xml + engines = glob(path.join(path.dirname(__file__), 'engines', '*.py')) + for engine in engines: + engi = path.basename(engine).split('.')[0].strip() + if len(engi) == 0 or engi.startswith('_'): + continue + try: + #import engines.[engine] + engine_module = __import__(".".join(("engines", engi))) + #get low-level module + engine_module = getattr(engine_module, engi) + #bind class name + globals()[engi] = getattr(engine_module, engi) + supported_engines.append(engi) + except: + pass -def displayCapabilities(): - """ - Display capabilities in XML format - - - long name - http://example.com - movies music games - - - """ - xml = "" - for short_name in supported_engines: - xml += engineToXml(short_name) - xml += "" - print(xml) + return supported_engines -class EngineLauncher(threading.Thread): - def __init__(self, engine, what, cat='all'): - threading.Thread.__init__(self) - self.engine = engine - self.what = what - self.cat = cat - def run(self): - if hasattr(self.engine, 'supported_categories'): - if self.cat == 'all' or self.cat in list(self.engine.supported_categories.keys()): - self.engine.search(self.what, self.cat) - elif self.cat == 'all': - self.engine.search(self.what) +def engines_to_xml(supported_engines): + """ Generates xml for supported engines """ + tab = " " * 4 -if __name__ == '__main__': - if len(sys.argv) < 2: - raise SystemExit('./nova2.py [all|engine1[,engine2]*] \navailable engines: %s'% - (','.join(supported_engines))) + for short_name in supported_engines: + search_engine = globals()[short_name]() - if len(sys.argv) == 2: - if sys.argv[1] == "--capabilities": - displayCapabilities() - sys.exit(0) - else: - raise SystemExit('./nova.py [all|engine1[,engine2]*] \navailable engines: %s'% - (','.join(supported_engines))) + supported_categories = "" + if hasattr(search_engine, "supported_categories"): + supported_categories = " ".join((key for key in search_engine.supported_categories.keys() + if key is not "all")) - engines_list = [e.lower() for e in sys.argv[1].strip().split(',')] + yield "".join((tab, "<", short_name, ">\n", + tab, tab, "", search_engine.name, "\n", + tab, tab, "", search_engine.url, "\n", + tab, tab, "", supported_categories, "\n", + tab, "\n")) - if 'all' in engines_list: - engines_list = supported_engines - - cat = sys.argv[2].lower() - - if cat not in CATEGORIES: - raise SystemExit('Invalid category!') - - what = urllib.parse.quote(' '.join(sys.argv[3:])) - - threads = [] - for engine in engines_list: - try: - if THREADED: - exec("l = EngineLauncher(%s(), what, cat)"%engine) - threads.append(l) - l.start() - else: - exec("e = %s()"%engine) - if hasattr(engine, 'supported_categories'): - if cat == 'all' or cat in list(e.supported_categories.keys()): - e.search(what, cat) - elif self.cat == 'all': - e.search(what) - engine().search(what, cat) - except: - pass - if THREADED: - for t in threads: - t.join() +def displayCapabilities(supported_engines): + """ + Display capabilities in XML format + + + long name + http://example.com + movies music games + + + """ + xml = "".join(("\n", + "".join(engines_to_xml(supported_engines)), + "")) + print(xml) + +def run_search(engine_list): + """ Run search in engine + + @retval False if any exceptions occured + @retval True otherwise + """ + engine, what, cat = engine_list + try: + engine = engine() + #avoid exceptions due to invalid category + if hasattr(engine, 'supported_categories'): + cat = cat if cat in engine.supported_categories else "all" + engine.search(what, cat) + else: + engine.search(what) + return True + except: + return False + +def main(args): + supported_engines = initialize_engines() + + if not args: + raise SystemExit("./nova2.py [all|engine1[,engine2]*] \n" + "available engines: %s" % (','.join(supported_engines))) + + elif args[0] == "--capabilities": + displayCapabilities(supported_engines) + return + + elif len(args) < 3: + raise SystemExit("./nova2.py [all|engine1[,engine2]*] \n" + "available engines: %s" % (','.join(supported_engines))) + + engines_list = set(e.lower() for e in args[0].strip().split(',')) + + if 'all' in engines_list: + engines_list = supported_engines + else: + #discard un-supported engines + engines_list = [engine for engine in engines_list + if engine in supported_engines] + + if not engines_list: + #engine list is empty. Nothing to do here + return + + cat = args[1].lower() + + if cat not in CATEGORIES: + raise SystemExit(" - ".join(('Invalid category', cat))) + + what = urllib.parse.quote(' '.join(args[2:])) + + if THREADED: + with Pool(min(len(engines_list), cpu_count())) as pool: + pool.map(run_search, ([globals()[engine], what, cat] for engine in engines_list)) + else: + _ = [run_search([globals()[engine], what, cat]) for engine in engines_list] + +if __name__ == "__main__": + main(argv[1:]) From d6d0f422f58193acdef698c11f8e644753454efa Mon Sep 17 00:00:00 2001 From: DoumanAsh Date: Mon, 6 Apr 2015 08:36:41 +0300 Subject: [PATCH 2/5] [search engine] engines update --- src/searchengine/nova/engines/extratorrent.py | 205 +++++++++++------- .../nova/engines/legittorrents.py | 6 +- src/searchengine/nova/engines/mininova.py | 194 ++++++++++------- .../nova/engines/torrentreactor.py | 154 ++++++------- src/searchengine/nova/engines/versions.txt | 9 +- .../nova3/engines/extratorrent.py | 205 +++++++++++------- .../nova3/engines/legittorrents.py | 6 +- src/searchengine/nova3/engines/mininova.py | 194 ++++++++++------- .../nova3/engines/torrentreactor.py | 153 ++++++------- src/searchengine/nova3/engines/versions.txt | 9 +- 10 files changed, 644 insertions(+), 491 deletions(-) diff --git a/src/searchengine/nova/engines/extratorrent.py b/src/searchengine/nova/engines/extratorrent.py index 2956406f4..19fce553c 100644 --- a/src/searchengine/nova/engines/extratorrent.py +++ b/src/searchengine/nova/engines/extratorrent.py @@ -1,4 +1,4 @@ -#VERSION: 1.2 +#VERSION: 2.0 #AUTHORS: Christophe Dumez (chris@qbittorrent.org) # Redistribution and use in source and binary forms, with or without @@ -25,92 +25,135 @@ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. - +from HTMLParser import HTMLParser +from httplib import HTTPConnection as http +#qBt from novaprinter import prettyPrinter -from helpers import retrieve_url, download_file -import sgmllib -import re +from helpers import download_file class extratorrent(object): - url = 'http://extratorrent.cc' - name = 'extratorrent' - supported_categories = {'all': '', 'movies': '4', 'tv': '8', 'music': '5', 'games': '3', 'anime': '1', 'software': '7', 'books': '2', 'pictures': '6'} + """ Search engine class """ + url = 'http://extratorrent.cc' + name = 'ExtraTorrent' + supported_categories = {'all' : '0', + 'movies' : '4', + 'tv' : '8', + 'music' : '5', + 'games' : '3', + 'anime' : '1', + 'software' : '7', + 'books' : '2', + 'pictures' : '6'} - def __init__(self): - self.results = [] - self.parser = self.SimpleSGMLParser(self.results, self.url) + def download_torrent(self, info): + """ Downloader """ + print(download_file(info)) - def download_torrent(self, info): - print download_file(info) + class MyHtmlParseWithBlackJack(HTMLParser): + """ Parser class """ + def __init__(self, list_searches, url): + HTMLParser.__init__(self) + self.url = url + self.list_searches = list_searches + self.current_item = None + self.cur_item_name = None + self.pending_size = False + self.next_queries = True + self.pending_next_queries = False - class SimpleSGMLParser(sgmllib.SGMLParser): - def __init__(self, results, url, *args): - sgmllib.SGMLParser.__init__(self) - self.url = url - self.td_counter = None - self.current_item = None - self.start_name = False - self.results = results - - def start_a(self, attr): - params = dict(attr) - #print params - if params.has_key('href') and params['href'].startswith("/torrent_download/"): - self.current_item = {} - self.td_counter = 0 - self.start_name = False - torrent_id = '/'.join(params['href'].split('/')[2:]) - self.current_item['link']=self.url+'/download/'+torrent_id - elif params.has_key('href') and params['href'].startswith("/torrent/") and params['href'].endswith(".html"): - self.current_item['desc_link'] = self.url + params['href'].strip() - self.start_name = True - - def handle_data(self, data): - if self.td_counter == 2: - if not self.current_item.has_key('name') and self.start_name: - self.current_item['name'] = data.strip() - elif self.td_counter == 3: - if not self.current_item.has_key('size'): - self.current_item['size'] = '' - self.current_item['size']+= data.replace(" ", " ").strip() - elif self.td_counter == 4: - if not self.current_item.has_key('seeds'): - self.current_item['seeds'] = '' - self.current_item['seeds']+= data.strip() - elif self.td_counter == 5: - if not self.current_item.has_key('leech'): - self.current_item['leech'] = '' - self.current_item['leech']+= data.strip() - - def start_td(self,attr): - if isinstance(self.td_counter,int): - self.td_counter += 1 - if self.td_counter > 5: - self.td_counter = None - # Display item + def handle_starttag(self, tag, attrs): if self.current_item: - self.current_item['engine_url'] = self.url - if not self.current_item['seeds'].isdigit(): - self.current_item['seeds'] = 0 - if not self.current_item['leech'].isdigit(): - self.current_item['leech'] = 0 - prettyPrinter(self.current_item) - self.results.append('a') + if tag == "a": + params = dict(attrs) + link = params['href'] - def search(self, what, cat='all'): - ret = [] - i = 1 - while True and i<11: - results = [] - parser = self.SimpleSGMLParser(results, self.url) - dat = retrieve_url(self.url+'/advanced_search/?with=%s&s_cat=%s&page=%d'%(what, self.supported_categories[cat], i)) - results_re = re.compile('(?s).*') - for match in results_re.finditer(dat): - res_tab = match.group(0) - parser.feed(res_tab) + if not link.startswith("/torrent"): + return + + if link[8] == "/": + #description + self.current_item["desc_link"] = "".join((self.url, link)) + #remove view at the beginning + self.current_item["name"] = params["title"][5:] + self.pending_size = True + elif link[8] == "_": + #download link + link = link.replace("torrent_", "", 1) + self.current_item["link"] = "".join((self.url, link)) + + elif tag == "td": + if self.pending_size: + self.cur_item_name = "size" + self.current_item["size"] = "" + self.pending_size = False + + for attr in attrs: + if attr[0] == "class": + if attr[1][0] == "s": + self.cur_item_name = "seeds" + self.current_item["seeds"] = "" + elif attr[1][0] == "l": + self.cur_item_name = "leech" + self.current_item["leech"] = "" + break + + + elif tag == "tr": + for attr in attrs: + if attr[0] == "class" and attr[1].startswith("tl"): + self.current_item = dict() + self.current_item["engine_url"] = self.url + break + + elif self.pending_next_queries: + if tag == "a": + params = dict(attrs) + self.list_searches.append(params['href']) + if params["title"] == "10": + self.pending_next_queries = False + else: + self.pending_next_queries = False + + elif self.next_queries: + if tag == "b" and ("class", "pager_no_link") in attrs: + self.next_queries = False + self.pending_next_queries = True + + def handle_data(self, data): + if self.cur_item_name: + temp = self.current_item[self.cur_item_name] + self.current_item[self.cur_item_name] = " ".join((temp, data)) + #Due to utf-8 we need to handle data two times if there is space + if not self.cur_item_name == "size": + self.cur_item_name = None + + def handle_endtag(self, tag): + if self.current_item: + if tag == "tr": + prettyPrinter(self.current_item) + self.current_item = None + + def search(self, what, cat="all"): + """ Performs search """ + connection = http("extratorrent.cc") + + query = "".join(("/search/?new=1&search=", what, "&s_cat=", self.supported_categories[cat])) + + connection.request("GET", query) + response = connection.getresponse() + if response.status != 200: + return + + list_searches = [] + parser = self.MyHtmlParseWithBlackJack(list_searches, self.url) + parser.feed(response.read().decode('utf-8')) parser.close() - break - if len(results) <= 0: - break - i += 1 - + + for search_query in list_searches: + connection.request("GET", search_query) + response = connection.getresponse() + parser.feed(response.read().decode('utf-8')) + parser.close() + + connection.close() + return diff --git a/src/searchengine/nova/engines/legittorrents.py b/src/searchengine/nova/engines/legittorrents.py index be083053e..a6b9b6f18 100644 --- a/src/searchengine/nova/engines/legittorrents.py +++ b/src/searchengine/nova/engines/legittorrents.py @@ -1,4 +1,4 @@ -#VERSION: 1.02 +#VERSION: 1.03 #AUTHORS: Christophe Dumez (chris@qbittorrent.org) # Redistribution and use in source and binary forms, with or without @@ -36,10 +36,6 @@ class legittorrents(object): name = 'legittorrents' supported_categories = {'all': '', 'movies': '1', 'tv': '13', 'music': '2', 'games': '3', 'anime': '5', 'books': '6'} - def __init__(self): - self.results = [] - self.parser = self.SimpleSGMLParser(self.results, self.url) - def download_torrent(self, info): print download_file(info) diff --git a/src/searchengine/nova/engines/mininova.py b/src/searchengine/nova/engines/mininova.py index 5355b0ec7..dc132cd6c 100644 --- a/src/searchengine/nova/engines/mininova.py +++ b/src/searchengine/nova/engines/mininova.py @@ -1,4 +1,4 @@ -#VERSION: 1.51 +#VERSION: 2.00 #AUTHORS: Christophe Dumez (chris@qbittorrent.org) #CONTRIBUTORS: Diego de las Heras (diegodelasheras@gmail.com) @@ -26,90 +26,124 @@ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. +from HTMLParser import HTMLParser +from httplib import HTTPConnection as http from novaprinter import prettyPrinter -from helpers import retrieve_url, download_file -import sgmllib -import re +from helpers import download_file class mininova(object): - # Mandatory properties - url = 'http://www.mininova.org' - name = 'Mininova' - supported_categories = {'all': '0', 'movies': '4', 'tv': '8', 'music': '5', 'games': '3', 'anime': '1', 'software': '7', 'pictures': '6', 'books': '2'} + """ Search engine class """ + url = 'http://www.mininova.org' + name = 'Mininova' + supported_categories = {'all' : '0', + 'movies' : '4', + 'tv' : '8', + 'music' : '5', + 'games' : '3', + 'anime' : '1', + 'software' : '7', + 'pictures' : '6', + 'books' : '2'} - def __init__(self): - self.results = [] - self.parser = self.SimpleSGMLParser(self.results, self.url) + def download_torrent(self, info): + print(download_file(info)) - def download_torrent(self, info): - print download_file(info) + class MyHtmlParseWithBlackJack(HTMLParser): + """ Parser class """ + def __init__(self, list_searches, url): + HTMLParser.__init__(self) + self.list_searches = list_searches + self.url = url + self.table_results = False + self.current_item = None + self.cur_item_name = None + self.next_queries = True - class SimpleSGMLParser(sgmllib.SGMLParser): - def __init__(self, results, url, *args): - sgmllib.SGMLParser.__init__(self) - self.url = url - self.td_counter = None - self.current_item = None - self.results = results - - def start_a(self, attr): - params = dict(attr) - #print params - if params.has_key('href'): - if params['href'].startswith("/get/"): - self.current_item = {} - self.td_counter = 0 - self.current_item['link']=self.url+params['href'].strip() - elif params['href'].startswith("/tor/") and self.current_item is not None: - self.current_item['desc_link']=self.url+params['href'].strip() - - def handle_data(self, data): - if self.td_counter == 0: - if not self.current_item.has_key('name'): - self.current_item['name'] = '' - self.current_item['name']+= data - elif self.td_counter == 1: - if not self.current_item.has_key('size'): - self.current_item['size'] = '' - self.current_item['size']+= data.strip() - elif self.td_counter == 2: - if not self.current_item.has_key('seeds'): - self.current_item['seeds'] = '' - self.current_item['seeds']+= data.strip() - elif self.td_counter == 3: - if not self.current_item.has_key('leech'): - self.current_item['leech'] = '' - self.current_item['leech']+= data.strip() - - def start_td(self,attr): - if isinstance(self.td_counter,int): - self.td_counter += 1 - if self.td_counter > 4: - self.td_counter = None - # Display item - if self.current_item: - self.current_item['engine_url'] = self.url - if not self.current_item['seeds'].isdigit(): - self.current_item['seeds'] = 0 - if not self.current_item['leech'].isdigit(): - self.current_item['leech'] = 0 - prettyPrinter(self.current_item) - self.results.append('a') + def handle_starttag_tr(self, _): + """ Handler of tr start tag """ + self.current_item = dict() - def search(self, what, cat='all'): - ret = [] - i = 1 - while True and i<11: - results = [] - parser = self.SimpleSGMLParser(results, self.url) - dat = retrieve_url(self.url+'/search/%s/%s/seeds/%d'%(what, self.supported_categories[cat], i)) - results_re = re.compile('(?s)

Search results for.*') - for match in results_re.finditer(dat): - res_tab = match.group(0) - parser.feed(res_tab) + def handle_starttag_a(self, attrs): + """ Handler of a start tag """ + params = dict(attrs) + link = params["href"] + + if link.startswith("/get/"): + #download link + self.current_item["link"] = "".join((self.url, link)) + elif link.startswith("/tor/"): + #description + self.current_item["desc_link"] = "".join((self.url, link)) + self.cur_item_name = "name" + self.current_item["name"] = "" + elif self.next_queries and link.startswith("/search"): + if params["title"].startswith("Page"): + self.list_searches.append(link) + + def handle_starttag_td(self, attrs): + """ Handler of td start tag """ + if ("align", "right") in attrs: + if not "size" in self.current_item.keys(): + self.cur_item_name = "size" + self.current_item["size"] = "" + + def handle_starttag_span(self, attrs): + """ Handler of span start tag """ + if ("class", "g") in attrs: + self.cur_item_name = "seeds" + self.current_item["seeds"] = "" + elif ("class", "b") in attrs: + self.cur_item_name = "leech" + self.current_item["leech"] = "" + + def handle_starttag(self, tag, attrs): + """ Parser's start tag handler """ + if self.table_results: + dispatcher = getattr(self, "_".join(("handle_starttag", tag)), None) + if dispatcher: + dispatcher(attrs) + + elif tag == "table": + self.table_results = ("class", "maintable") in attrs + + def handle_endtag(self, tag): + """ Parser's end tag handler """ + if tag == "tr" and self.current_item: + self.current_item["engine_url"] = self.url + prettyPrinter(self.current_item) + self.current_item = None + elif self.cur_item_name: + if tag == "a" or tag == "span": + self.cur_item_name = None + + def handle_data(self, data): + """ Parser's data handler """ + if self.cur_item_name: + temp = self.current_item[self.cur_item_name] + self.current_item[self.cur_item_name] = " ".join((temp, data)) + + def search(self, what, cat="all"): + """ Performs search """ + connection = http("www.mininova.org") + + query = "/".join(("/search", what, self.supported_categories[cat], "seeds")) + + connection.request("GET", query) + response = connection.getresponse() + if response.status != 200: + return + + list_searches = [] + parser = self.MyHtmlParseWithBlackJack(list_searches, self.url) + parser.feed(response.read().decode('utf-8')) parser.close() - break - if len(results) <= 0: - break - i += 1 - + + parser.next_queries = False + for search_query in list_searches: + connection.request("GET", search_query) + response = connection.getresponse() + parser.feed(response.read().decode('utf-8')) + parser.close() + + connection.close() + return diff --git a/src/searchengine/nova/engines/torrentreactor.py b/src/searchengine/nova/engines/torrentreactor.py index ee74f4e75..dff7d35f2 100644 --- a/src/searchengine/nova/engines/torrentreactor.py +++ b/src/searchengine/nova/engines/torrentreactor.py @@ -1,4 +1,4 @@ -#VERSION: 1.33 +#VERSION: 1.35 #AUTHORS: Gekko Dam Beer (gekko04@users.sourceforge.net) #CONTRIBUTORS: Christophe Dumez (chris@qbittorrent.org) # Bruno Barbieri (brunorex@gmail.com) @@ -28,92 +28,94 @@ # POSSIBILITY OF SUCH DAMAGE. from novaprinter import prettyPrinter -from helpers import retrieve_url, download_file -from urllib2 import HTTPError -from HTMLParser import HTMLParser +from helpers import download_file import urllib +from HTMLParser import HTMLParser +from httplib import HTTPConnection as http import re class torrentreactor(object): - url = 'http://www.torrentreactor.net' - name = 'TorrentReactor.Net' - supported_categories = {'all': '', 'movies': '5', 'tv': '8', 'music': '6', 'games': '3', 'anime': '1', 'software': '2'} + url = 'http://www.torrentreactor.net' + name = 'TorrentReactor.Net' + supported_categories = {'all': '', 'movies': '5', 'tv': '8', 'music': '6', 'games': '3', 'anime': '1', 'software': '2'} - def download_torrent(self, info): - print download_file(info) + def download_torrent(self, info): + print(download_file(info)) - class SimpleHTMLParser(HTMLParser): - def __init__(self, results, url, *args): - HTMLParser.__init__(self) - self.td_counter = None - self.current_item = None - self.results = results - self.id = None - self.url = url - self.dispatcher = { 'a' : self.start_a, 'td' : self.start_td } + class SimpleHTMLParser(HTMLParser): + def __init__(self, results, url, *args): + HTMLParser.__init__(self) + self.td_counter = None + self.current_item = None + self.results = results + self.id = None + self.url = url + self.dispatcher = { 'a' : self.start_a, 'td' : self.start_td } - def handle_starttag(self, tag, attrs): - if tag in self.dispatcher: - self.dispatcher[tag](attrs) + def handle_starttag(self, tag, attrs): + if tag in self.dispatcher: + self.dispatcher[tag](attrs) - def start_a(self, attr): - params = dict(attr) - if re.match("/torrents/\d+.*", params['href']): - self.current_item = {} - self.current_item['desc_link'] = self.url+params['href'].strip() - elif 'torrentreactor.net/download.php' in params['href']: - self.td_counter = 0 - self.current_item['link'] = params['href'].strip() - self.current_item['name'] = urllib.unquote_plus(params['href'].split('&')[1].split('name=')[1]) + def start_a(self, attr): + params = dict(attr) + if re.match("/torrents/\d+.*", params['href']): + self.current_item = {} + self.current_item['desc_link'] = self.url+params['href'].strip() + elif 'torrentreactor.net/download.php' in params['href']: + self.td_counter = 0 + self.current_item['link'] = params['href'].strip() + self.current_item['name'] = urllib.unquote_plus(params['href'].split('&')[1].split('name=')[1]) - def handle_data(self, data): - if self.td_counter == 1: - if not self.current_item.has_key('size'): - self.current_item['size'] = '' - self.current_item['size']+= data.strip() - elif self.td_counter == 2: - if not self.current_item.has_key('seeds'): - self.current_item['seeds'] = '' - self.current_item['seeds']+= data.strip() - elif self.td_counter == 3: - if not self.current_item.has_key('leech'): - self.current_item['leech'] = '' - self.current_item['leech']+= data.strip() + def handle_data(self, data): + if self.td_counter == 1: + if 'size' not in self.current_item: + self.current_item['size'] = '' + self.current_item['size']+= data.strip() + elif self.td_counter == 2: + if 'seeds' not in self.current_item: + self.current_item['seeds'] = '' + self.current_item['seeds']+= data.strip() + elif self.td_counter == 3: + if 'leech' not in self.current_item: + self.current_item['leech'] = '' + self.current_item['leech']+= data.strip() - def start_td(self,attr): - if isinstance(self.td_counter,int): - self.td_counter += 1 - if self.td_counter > 3: - self.td_counter = None - # add item to results - if self.current_item: - self.current_item['engine_url'] = self.url - if not self.current_item['seeds'].isdigit(): - self.current_item['seeds'] = 0 - if not self.current_item['leech'].isdigit(): - self.current_item['leech'] = 0 - prettyPrinter(self.current_item) - self.has_results = True - self.results.append('a') + def start_td(self,attr): + if isinstance(self.td_counter,int): + self.td_counter += 1 + if self.td_counter > 3: + self.td_counter = None + # add item to results + if self.current_item: + self.current_item['engine_url'] = self.url + if not self.current_item['seeds'].isdigit(): + self.current_item['seeds'] = 0 + if not self.current_item['leech'].isdigit(): + self.current_item['leech'] = 0 + prettyPrinter(self.current_item) + self.has_results = True + self.results.append('a') - def __init__(self): - self.results = [] - self.parser = self.SimpleHTMLParser(self.results, self.url) + def search(self, what, cat='all'): + i = 0 + dat = '' + connection = http("www.torrentreactor.net") - def search(self, what, cat='all'): - i = 0 - dat = '' - while True and i<11: - results = [] - parser = self.SimpleHTMLParser(results, self.url) + while True and i<11: + results = [] + parser = self.SimpleHTMLParser(results, self.url) + query = '/torrents-search/%s/%d?sort=seeders.desc&type=all&period=none&categories=%s'%(what, (i*35), self.supported_categories[cat]) + connection.request("GET", query) + response = connection.getresponse() + if response.status != 200: + break - try: - dat = retrieve_url(self.url+'/torrent-search/%s/%s?sort=seeders.desc&type=all&period=none&categories=%s'%(what, (i*35), self.supported_categories[cat])) - except HTTPError: - break + dat = response.read().decode('utf-8') - parser.feed(dat) - parser.close() - if len(results) <= 0: - break - i += 1 + parser.feed(dat) + parser.close() + if len(results) <= 0: + break + i += 1 + + connection.close() diff --git a/src/searchengine/nova/engines/versions.txt b/src/searchengine/nova/engines/versions.txt index d581a676f..77fb875d5 100644 --- a/src/searchengine/nova/engines/versions.txt +++ b/src/searchengine/nova/engines/versions.txt @@ -1,8 +1,9 @@ -torrentreactor: 1.33 -mininova: 1.51 -piratebay: 2.11 extratorrent: 1.2 +torrentreactor: 1.35 +mininova: 2.00 +piratebay: 2.11 +extratorrent: 2.0 kickasstorrents: 1.26 btdigg: 1.24 -legittorrents: 1.02 torrentz: 2.13 +legittorrents: 1.03 diff --git a/src/searchengine/nova3/engines/extratorrent.py b/src/searchengine/nova3/engines/extratorrent.py index df1ef9b24..de3dcb9a2 100644 --- a/src/searchengine/nova3/engines/extratorrent.py +++ b/src/searchengine/nova3/engines/extratorrent.py @@ -1,4 +1,4 @@ -#VERSION: 1.2 +#VERSION: 2.0 #AUTHORS: Christophe Dumez (chris@qbittorrent.org) # Redistribution and use in source and binary forms, with or without @@ -25,92 +25,135 @@ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. - +from html.parser import HTMLParser +from http.client import HTTPConnection as http +#qBt from novaprinter import prettyPrinter -from helpers import retrieve_url, download_file -import sgmllib3 -import re +from helpers import download_file class extratorrent(object): - url = 'http://extratorrent.cc' - name = 'extratorrent' - supported_categories = {'all': '', 'movies': '4', 'tv': '8', 'music': '5', 'games': '3', 'anime': '1', 'software': '7', 'books': '2', 'pictures': '6'} + """ Search engine class """ + url = 'http://extratorrent.cc' + name = 'ExtraTorrent' + supported_categories = {'all' : '0', + 'movies' : '4', + 'tv' : '8', + 'music' : '5', + 'games' : '3', + 'anime' : '1', + 'software' : '7', + 'books' : '2', + 'pictures' : '6'} - def __init__(self): - self.results = [] - self.parser = self.SimpleSGMLParser(self.results, self.url) + def download_torrent(self, info): + """ Downloader """ + print(download_file(info)) - def download_torrent(self, info): - print(download_file(info)) + class MyHtmlParseWithBlackJack(HTMLParser): + """ Parser class """ + def __init__(self, list_searches, url): + HTMLParser.__init__(self) + self.url = url + self.list_searches = list_searches + self.current_item = None + self.cur_item_name = None + self.pending_size = False + self.next_queries = True + self.pending_next_queries = False - class SimpleSGMLParser(sgmllib3.SGMLParser): - def __init__(self, results, url, *args): - sgmllib3.SGMLParser.__init__(self) - self.url = url - self.td_counter = None - self.current_item = None - self.start_name = False - self.results = results - - def start_a(self, attr): - params = dict(attr) - #print params - if 'href' in params and params['href'].startswith("/torrent_download/"): - self.current_item = {} - self.td_counter = 0 - self.start_name = False - torrent_id = '/'.join(params['href'].split('/')[2:]) - self.current_item['link']=self.url+'/download/'+torrent_id - elif 'href' in params and params['href'].startswith("/torrent/") and params['href'].endswith(".html"): - self.current_item['desc_link'] = self.url + params['href'].strip() - self.start_name = True - - def handle_data(self, data): - if self.td_counter == 2: - if 'name' not in self.current_item and self.start_name: - self.current_item['name'] = data.strip() - elif self.td_counter == 3: - if 'size' not in self.current_item: - self.current_item['size'] = '' - self.current_item['size']+= data.replace(" ", " ").strip() - elif self.td_counter == 4: - if 'seeds' not in self.current_item: - self.current_item['seeds'] = '' - self.current_item['seeds']+= data.strip() - elif self.td_counter == 5: - if 'leech' not in self.current_item: - self.current_item['leech'] = '' - self.current_item['leech']+= data.strip() - - def start_td(self,attr): - if isinstance(self.td_counter,int): - self.td_counter += 1 - if self.td_counter > 5: - self.td_counter = None - # Display item + def handle_starttag(self, tag, attrs): if self.current_item: - self.current_item['engine_url'] = self.url - if not self.current_item['seeds'].isdigit(): - self.current_item['seeds'] = 0 - if not self.current_item['leech'].isdigit(): - self.current_item['leech'] = 0 - prettyPrinter(self.current_item) - self.results.append('a') + if tag == "a": + params = dict(attrs) + link = params['href'] - def search(self, what, cat='all'): - ret = [] - i = 1 - while True and i<11: - results = [] - parser = self.SimpleSGMLParser(results, self.url) - dat = retrieve_url(self.url+'/advanced_search/?with=%s&s_cat=%s&page=%d'%(what, self.supported_categories[cat], i)) - results_re = re.compile('(?s)

.*') - for match in results_re.finditer(dat): - res_tab = match.group(0) - parser.feed(res_tab) + if not link.startswith("/torrent"): + return + + if link[8] == "/": + #description + self.current_item["desc_link"] = "".join((self.url, link)) + #remove view at the beginning + self.current_item["name"] = params["title"][5:] + self.pending_size = True + elif link[8] == "_": + #download link + link = link.replace("torrent_", "", 1) + self.current_item["link"] = "".join((self.url, link)) + + elif tag == "td": + if self.pending_size: + self.cur_item_name = "size" + self.current_item["size"] = "" + self.pending_size = False + + for attr in attrs: + if attr[0] == "class": + if attr[1][0] == "s": + self.cur_item_name = "seeds" + self.current_item["seeds"] = "" + elif attr[1][0] == "l": + self.cur_item_name = "leech" + self.current_item["leech"] = "" + break + + + elif tag == "tr": + for attr in attrs: + if attr[0] == "class" and attr[1].startswith("tl"): + self.current_item = dict() + self.current_item["engine_url"] = self.url + break + + elif self.pending_next_queries: + if tag == "a": + params = dict(attrs) + self.list_searches.append(params['href']) + if params["title"] == "10": + self.pending_next_queries = False + else: + self.pending_next_queries = False + + elif self.next_queries: + if tag == "b" and ("class", "pager_no_link") in attrs: + self.next_queries = False + self.pending_next_queries = True + + def handle_data(self, data): + if self.cur_item_name: + temp = self.current_item[self.cur_item_name] + self.current_item[self.cur_item_name] = " ".join((temp, data)) + #Due to utf-8 we need to handle data two times if there is space + if not self.cur_item_name == "size": + self.cur_item_name = None + + def handle_endtag(self, tag): + if self.current_item: + if tag == "tr": + prettyPrinter(self.current_item) + self.current_item = None + + def search(self, what, cat="all"): + """ Performs search """ + connection = http("extratorrent.cc") + + query = "".join(("/search/?new=1&search=", what, "&s_cat=", self.supported_categories[cat])) + + connection.request("GET", query) + response = connection.getresponse() + if response.status != 200: + return + + list_searches = [] + parser = self.MyHtmlParseWithBlackJack(list_searches, self.url) + parser.feed(response.read().decode('utf-8')) parser.close() - break - if len(results) <= 0: - break - i += 1 - + + for search_query in list_searches: + connection.request("GET", search_query) + response = connection.getresponse() + parser.feed(response.read().decode('utf-8')) + parser.close() + + connection.close() + return diff --git a/src/searchengine/nova3/engines/legittorrents.py b/src/searchengine/nova3/engines/legittorrents.py index 290852f0d..60297c574 100644 --- a/src/searchengine/nova3/engines/legittorrents.py +++ b/src/searchengine/nova3/engines/legittorrents.py @@ -1,4 +1,4 @@ -#VERSION: 1.03 +#VERSION: 1.04 #AUTHORS: Christophe Dumez (chris@qbittorrent.org) # Redistribution and use in source and binary forms, with or without @@ -36,10 +36,6 @@ class legittorrents(object): name = 'legittorrents' supported_categories = {'all': '', 'movies': '1', 'tv': '13', 'music': '2', 'games': '3', 'anime': '5', 'books': '6'} - def __init__(self): - self.results = [] - self.parser = self.SimpleSGMLParser(self.results, self.url) - def download_torrent(self, info): print(download_file(info)) diff --git a/src/searchengine/nova3/engines/mininova.py b/src/searchengine/nova3/engines/mininova.py index 96d6ed8e5..12544db09 100644 --- a/src/searchengine/nova3/engines/mininova.py +++ b/src/searchengine/nova3/engines/mininova.py @@ -1,4 +1,4 @@ -#VERSION: 1.51 +#VERSION: 2.00 #AUTHORS: Christophe Dumez (chris@qbittorrent.org) #CONTRIBUTORS: Diego de las Heras (diegodelasheras@gmail.com) @@ -26,90 +26,124 @@ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. +from html.parser import HTMLParser +from http.client import HTTPConnection as http from novaprinter import prettyPrinter -from helpers import retrieve_url, download_file -import sgmllib3 -import re +from helpers import download_file class mininova(object): - # Mandatory properties - url = 'http://www.mininova.org' - name = 'Mininova' - supported_categories = {'all': '0', 'movies': '4', 'tv': '8', 'music': '5', 'games': '3', 'anime': '1', 'software': '7', 'pictures': '6', 'books': '2'} + """ Search engine class """ + url = 'http://www.mininova.org' + name = 'Mininova' + supported_categories = {'all' : '0', + 'movies' : '4', + 'tv' : '8', + 'music' : '5', + 'games' : '3', + 'anime' : '1', + 'software' : '7', + 'pictures' : '6', + 'books' : '2'} - def __init__(self): - self.results = [] - self.parser = self.SimpleSGMLParser(self.results, self.url) + def download_torrent(self, info): + print(download_file(info)) - def download_torrent(self, info): - print(download_file(info)) + class MyHtmlParseWithBlackJack(HTMLParser): + """ Parser class """ + def __init__(self, list_searches, url): + HTMLParser.__init__(self) + self.list_searches = list_searches + self.url = url + self.table_results = False + self.current_item = None + self.cur_item_name = None + self.next_queries = True - class SimpleSGMLParser(sgmllib3.SGMLParser): - def __init__(self, results, url, *args): - sgmllib3.SGMLParser.__init__(self) - self.url = url - self.td_counter = None - self.current_item = None - self.results = results - - def start_a(self, attr): - params = dict(attr) - #print params - if 'href' in params: - if params['href'].startswith("/get/"): - self.current_item = {} - self.td_counter = 0 - self.current_item['link']=self.url+params['href'].strip() - elif params['href'].startswith("/tor/") and self.current_item is not None: - self.current_item['desc_link']=self.url+params['href'].strip() - - def handle_data(self, data): - if self.td_counter == 0: - if 'name' not in self.current_item: - self.current_item['name'] = '' - self.current_item['name']+= data - elif self.td_counter == 1: - if 'size' not in self.current_item: - self.current_item['size'] = '' - self.current_item['size']+= data.strip() - elif self.td_counter == 2: - if 'seeds' not in self.current_item: - self.current_item['seeds'] = '' - self.current_item['seeds']+= data.strip() - elif self.td_counter == 3: - if 'leech' not in self.current_item: - self.current_item['leech'] = '' - self.current_item['leech']+= data.strip() - - def start_td(self,attr): - if isinstance(self.td_counter,int): - self.td_counter += 1 - if self.td_counter > 4: - self.td_counter = None - # Display item - if self.current_item: - self.current_item['engine_url'] = self.url - if not self.current_item['seeds'].isdigit(): - self.current_item['seeds'] = 0 - if not self.current_item['leech'].isdigit(): - self.current_item['leech'] = 0 - prettyPrinter(self.current_item) - self.results.append('a') + def handle_starttag_tr(self, _): + """ Handler of tr start tag """ + self.current_item = dict() - def search(self, what, cat='all'): - ret = [] - i = 1 - while True and i<11: - results = [] - parser = self.SimpleSGMLParser(results, self.url) - dat = retrieve_url(self.url+'/search/%s/%s/seeds/%d'%(what, self.supported_categories[cat], i)) - results_re = re.compile('(?s)

Search results for.*') - for match in results_re.finditer(dat): - res_tab = match.group(0) - parser.feed(res_tab) + def handle_starttag_a(self, attrs): + """ Handler of a start tag """ + params = dict(attrs) + link = params["href"] + + if link.startswith("/get/"): + #download link + self.current_item["link"] = "".join((self.url, link)) + elif link.startswith("/tor/"): + #description + self.current_item["desc_link"] = "".join((self.url, link)) + self.cur_item_name = "name" + self.current_item["name"] = "" + elif self.next_queries and link.startswith("/search"): + if params["title"].startswith("Page"): + self.list_searches.append(link) + + def handle_starttag_td(self, attrs): + """ Handler of td start tag """ + if ("align", "right") in attrs: + if not "size" in self.current_item.keys(): + self.cur_item_name = "size" + self.current_item["size"] = "" + + def handle_starttag_span(self, attrs): + """ Handler of span start tag """ + if ("class", "g") in attrs: + self.cur_item_name = "seeds" + self.current_item["seeds"] = "" + elif ("class", "b") in attrs: + self.cur_item_name = "leech" + self.current_item["leech"] = "" + + def handle_starttag(self, tag, attrs): + """ Parser's start tag handler """ + if self.table_results: + dispatcher = getattr(self, "_".join(("handle_starttag", tag)), None) + if dispatcher: + dispatcher(attrs) + + elif tag == "table": + self.table_results = ("class", "maintable") in attrs + + def handle_endtag(self, tag): + """ Parser's end tag handler """ + if tag == "tr" and self.current_item: + self.current_item["engine_url"] = self.url + prettyPrinter(self.current_item) + self.current_item = None + elif self.cur_item_name: + if tag == "a" or tag == "span": + self.cur_item_name = None + + def handle_data(self, data): + """ Parser's data handler """ + if self.cur_item_name: + temp = self.current_item[self.cur_item_name] + self.current_item[self.cur_item_name] = " ".join((temp, data)) + + def search(self, what, cat="all"): + """ Performs search """ + connection = http("www.mininova.org") + + query = "/".join(("/search", what, self.supported_categories[cat], "seeds")) + + connection.request("GET", query) + response = connection.getresponse() + if response.status != 200: + return + + list_searches = [] + parser = self.MyHtmlParseWithBlackJack(list_searches, self.url) + parser.feed(response.read().decode('utf-8')) parser.close() - break - if len(results) <= 0: - break - i += 1 - + + parser.next_queries = False + for search_query in list_searches: + connection.request("GET", search_query) + response = connection.getresponse() + parser.feed(response.read().decode('utf-8')) + parser.close() + + connection.close() + return diff --git a/src/searchengine/nova3/engines/torrentreactor.py b/src/searchengine/nova3/engines/torrentreactor.py index a099ec5ab..da6391cba 100644 --- a/src/searchengine/nova3/engines/torrentreactor.py +++ b/src/searchengine/nova3/engines/torrentreactor.py @@ -1,4 +1,4 @@ -#VERSION: 1.33 +#VERSION: 1.35 #AUTHORS: Gekko Dam Beer (gekko04@users.sourceforge.net) #CONTRIBUTORS: Christophe Dumez (chris@qbittorrent.org) # Bruno Barbieri (brunorex@gmail.com) @@ -28,91 +28,94 @@ # POSSIBILITY OF SUCH DAMAGE. from novaprinter import prettyPrinter -from helpers import retrieve_url, download_file -from urllib import error, parse +from helpers import download_file +from urllib import parse from html.parser import HTMLParser +from http.client import HTTPConnection as http import re class torrentreactor(object): - url = 'http://www.torrentreactor.net' - name = 'TorrentReactor.Net' - supported_categories = {'all': '', 'movies': '5', 'tv': '8', 'music': '6', 'games': '3', 'anime': '1', 'software': '2'} + url = 'http://www.torrentreactor.net' + name = 'TorrentReactor.Net' + supported_categories = {'all': '', 'movies': '5', 'tv': '8', 'music': '6', 'games': '3', 'anime': '1', 'software': '2'} - def download_torrent(self, info): - print(download_file(info)) + def download_torrent(self, info): + print(download_file(info)) - class SimpleHTMLParser(HTMLParser): - def __init__(self, results, url, *args): - HTMLParser.__init__(self) - self.td_counter = None - self.current_item = None - self.results = results - self.id = None - self.url = url - self.dispatcher = { 'a' : self.start_a, 'td' : self.start_td } + class SimpleHTMLParser(HTMLParser): + def __init__(self, results, url, *args): + HTMLParser.__init__(self) + self.td_counter = None + self.current_item = None + self.results = results + self.id = None + self.url = url + self.dispatcher = { 'a' : self.start_a, 'td' : self.start_td } - def handle_starttag(self, tag, attrs): - if tag in self.dispatcher: - self.dispatcher[tag](attrs) + def handle_starttag(self, tag, attrs): + if tag in self.dispatcher: + self.dispatcher[tag](attrs) - def start_a(self, attr): - params = dict(attr) - if re.match("/torrents/\d+.*", params['href']): - self.current_item = {} - self.current_item['desc_link'] = self.url+params['href'].strip() - elif 'torrentreactor.net/download.php' in params['href']: - self.td_counter = 0 - self.current_item['link'] = params['href'].strip() - self.current_item['name'] = parse.unquote_plus(params['href'].split('&')[1].split('name=')[1]) + def start_a(self, attr): + params = dict(attr) + if re.match("/torrents/\d+.*", params['href']): + self.current_item = {} + self.current_item['desc_link'] = self.url+params['href'].strip() + elif 'torrentreactor.net/download.php' in params['href']: + self.td_counter = 0 + self.current_item['link'] = params['href'].strip() + self.current_item['name'] = parse.unquote_plus(params['href'].split('&')[1].split('name=')[1]) - def handle_data(self, data): - if self.td_counter == 1: - if 'size' not in self.current_item: - self.current_item['size'] = '' - self.current_item['size']+= data.strip() - elif self.td_counter == 2: - if 'seeds' not in self.current_item: - self.current_item['seeds'] = '' - self.current_item['seeds']+= data.strip() - elif self.td_counter == 3: - if 'leech' not in self.current_item: - self.current_item['leech'] = '' - self.current_item['leech']+= data.strip() + def handle_data(self, data): + if self.td_counter == 1: + if 'size' not in self.current_item: + self.current_item['size'] = '' + self.current_item['size']+= data.strip() + elif self.td_counter == 2: + if 'seeds' not in self.current_item: + self.current_item['seeds'] = '' + self.current_item['seeds']+= data.strip() + elif self.td_counter == 3: + if 'leech' not in self.current_item: + self.current_item['leech'] = '' + self.current_item['leech']+= data.strip() - def start_td(self,attr): - if isinstance(self.td_counter,int): - self.td_counter += 1 - if self.td_counter > 3: - self.td_counter = None - # add item to results - if self.current_item: - self.current_item['engine_url'] = self.url - if not self.current_item['seeds'].isdigit(): - self.current_item['seeds'] = 0 - if not self.current_item['leech'].isdigit(): - self.current_item['leech'] = 0 - prettyPrinter(self.current_item) - self.has_results = True - self.results.append('a') + def start_td(self,attr): + if isinstance(self.td_counter,int): + self.td_counter += 1 + if self.td_counter > 3: + self.td_counter = None + # add item to results + if self.current_item: + self.current_item['engine_url'] = self.url + if not self.current_item['seeds'].isdigit(): + self.current_item['seeds'] = 0 + if not self.current_item['leech'].isdigit(): + self.current_item['leech'] = 0 + prettyPrinter(self.current_item) + self.has_results = True + self.results.append('a') - def __init__(self): - self.results = [] - self.parser = self.SimpleHTMLParser(self.results, self.url) + def search(self, what, cat='all'): + i = 0 + dat = '' + connection = http("www.torrentreactor.net") - def search(self, what, cat='all'): - i = 0 - dat = '' - while True and i<11: - results = [] - parser = self.SimpleHTMLParser(results, self.url) + while True and i<11: + results = [] + parser = self.SimpleHTMLParser(results, self.url) + query = '/torrents-search/%s/%d?sort=seeders.desc&type=all&period=none&categories=%s'%(what, (i*35), self.supported_categories[cat]) + connection.request("GET", query) + response = connection.getresponse() + if response.status != 200: + break - try: - dat = retrieve_url(self.url+'/torrent-search/%s/%s?sort=seeders.desc&type=all&period=none&categories=%s'%(what, (i*35), self.supported_categories[cat])) - except error.HTTPError: - break + dat = response.read().decode('utf-8') - parser.feed(dat) - parser.close() - if len(results) <= 0: - break - i += 1 + parser.feed(dat) + parser.close() + if len(results) <= 0: + break + i += 1 + + connection.close() diff --git a/src/searchengine/nova3/engines/versions.txt b/src/searchengine/nova3/engines/versions.txt index c0e097583..739171208 100644 --- a/src/searchengine/nova3/engines/versions.txt +++ b/src/searchengine/nova3/engines/versions.txt @@ -1,8 +1,9 @@ -torrentreactor: 1.33 -mininova: 1.51 -piratebay: 2.11 extratorrent: 1.2 +torrentreactor: 1.35 +mininova: 2.00 +piratebay: 2.11 +extratorrent: 2.0 kickasstorrents: 1.26 btdigg: 1.23 -legittorrents: 1.03 torrentz: 2.13 +legittorrents: 1.04 From 2fc14876037d8fca34c23104cf20a600f0c5cc5b Mon Sep 17 00:00:00 2001 From: DoumanAsh Date: Mon, 6 Apr 2015 08:36:58 +0300 Subject: [PATCH 3/5] [search engine] re-factoring of code --- src/searchengine/nova/nova2dl.py | 44 +- src/searchengine/nova/novaprinter.py | 67 ++- src/searchengine/nova/socks.py | 652 +++++++++++++------------- src/searchengine/nova3/nova2dl.py | 44 +- src/searchengine/nova3/novaprinter.py | 66 ++- src/searchengine/nova3/socks.py | 652 +++++++++++++------------- 6 files changed, 761 insertions(+), 764 deletions(-) diff --git a/src/searchengine/nova/nova2dl.py b/src/searchengine/nova/nova2dl.py index e0c2c2cd6..0d97c0154 100644 --- a/src/searchengine/nova/nova2dl.py +++ b/src/searchengine/nova/nova2dl.py @@ -25,7 +25,7 @@ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. -#VERSION: 1.10 +#VERSION: 1.20 # Author: # Christophe DUMEZ (chris@qbittorrent.org) @@ -39,26 +39,26 @@ supported_engines = dict() engines = glob.glob(os.path.join(os.path.dirname(__file__), 'engines','*.py')) for engine in engines: - e = engine.split(os.sep)[-1][:-3] - if len(e.strip()) == 0: continue - if e.startswith('_'): continue - try: - exec "from engines.%s import %s"%(e,e) - exec "engine_url = %s.url"%e - supported_engines[engine_url] = e - except: - pass + e = engine.split(os.sep)[-1][:-3] + if len(e.strip()) == 0: continue + if e.startswith('_'): continue + try: + exec("from engines.%s import %s"%(e,e)) + exec("engine_url = %s.url"%e) + supported_engines[engine_url] = e + except: + pass if __name__ == '__main__': - if len(sys.argv) < 3: - raise SystemExit('./nova2dl.py engine_url download_parameter') - engine_url = sys.argv[1].strip() - download_param = sys.argv[2].strip() - if engine_url not in supported_engines.keys(): - raise SystemExit('./nova2dl.py: this engine_url was not recognized') - exec "engine = %s()"%supported_engines[engine_url] - if hasattr(engine, 'download_torrent'): - engine.download_torrent(download_param) - else: - print download_file(download_param) - sys.exit(0) \ No newline at end of file + if len(sys.argv) < 3: + raise SystemExit('./nova2dl.py engine_url download_parameter') + engine_url = sys.argv[1].strip() + download_param = sys.argv[2].strip() + if engine_url not in list(supported_engines.keys()): + raise SystemExit('./nova2dl.py: this engine_url was not recognized') + exec("engine = %s()"%supported_engines[engine_url]) + if hasattr(engine, 'download_torrent'): + engine.download_torrent(download_param) + else: + print(download_file(download_param)) + sys.exit(0) diff --git a/src/searchengine/nova/novaprinter.py b/src/searchengine/nova/novaprinter.py index 848f932fa..fc16949e6 100644 --- a/src/searchengine/nova/novaprinter.py +++ b/src/searchengine/nova/novaprinter.py @@ -25,45 +25,44 @@ # POSSIBILITY OF SUCH DAMAGE. import sys, codecs +from io import open # Force UTF-8 printing sys.stdout = codecs.getwriter('utf-8')(sys.stdout) def prettyPrinter(dictionary): - # Convert everything to unicode for safe printing - for key,value in dictionary.items(): - if isinstance(dictionary[key], str): - dictionary[key] = unicode(dictionary[key], 'utf-8') - dictionary['size'] = anySizeToBytes(dictionary['size']) - if dictionary.has_key('desc_link'): - print u"%s|%s|%s|%s|%s|%s|%s"%(dictionary['link'],dictionary['name'].replace('|',' '),dictionary['size'],dictionary['seeds'],dictionary['leech'],dictionary['engine_url'],dictionary['desc_link']) - else: - print u"%s|%s|%s|%s|%s|%s"%(dictionary['link'],dictionary['name'].replace('|',' '),dictionary['size'],dictionary['seeds'],dictionary['leech'],dictionary['engine_url']) + dictionary['size'] = anySizeToBytes(dictionary['size']) + outtext = "|".join((dictionary["link"], dictionary["name"].replace("|", " "), str(dictionary["size"]), str(dictionary["seeds"]), str(dictionary["leech"]), dictionary["engine_url"])) + if 'desc_link' in dictionary: + outtext = "|".join((outtext, dictionary["desc_link"])) + + with open(1, 'w', encoding='utf-8', closefd=False) as utf8_stdout: + utf8_stdout.write("".join((outtext, "\n"))) def anySizeToBytes(size_string): - """ - Convert a string like '1 KB' to '1024' (bytes) - """ - # separate integer from unit - try: - size, unit = size_string.split() - except: - try: - size = size_string.strip() - unit = ''.join([c for c in size if c.isalpha()]) - if len(unit) > 0: - size = size[:-len(unit)] - except: - return -1 - if len(size) == 0: - return -1 - size = float(size) - if len(unit) == 0: - return int(size) - short_unit = unit.upper()[0] + """ + Convert a string like '1 KB' to '1024' (bytes) + """ + # separate integer from unit + try: + size, unit = size_string.split() + except: + try: + size = size_string.strip() + unit = ''.join([c for c in size if c.isalpha()]) + if len(unit) > 0: + size = size[:-len(unit)] + except: + return -1 + if len(size) == 0: + return -1 + size = float(size) + if len(unit) == 0: + return int(size) + short_unit = unit.upper()[0] - # convert - units_dict = { 'T': 40, 'G': 30, 'M': 20, 'K': 10 } - if units_dict.has_key( short_unit ): - size = size * 2**units_dict[short_unit] - return int(size) + # convert + units_dict = {'T': 40, 'G': 30, 'M': 20, 'K': 10} + if units_dict.has_key(short_unit): + size = size * 2**units_dict[short_unit] + return int(size) diff --git a/src/searchengine/nova/socks.py b/src/searchengine/nova/socks.py index 0033993d5..844e8fe9d 100644 --- a/src/searchengine/nova/socks.py +++ b/src/searchengine/nova/socks.py @@ -42,350 +42,350 @@ _defaultproxy = None _orgsocket = socket.socket class ProxyError(Exception): - def __init__(self, value): - self.value = value - def __str__(self): - return repr(self.value) + def __init__(self, value): + self.value = value + def __str__(self): + return repr(self.value) class GeneralProxyError(ProxyError): - def __init__(self, value): - self.value = value - def __str__(self): - return repr(self.value) + def __init__(self, value): + self.value = value + def __str__(self): + return repr(self.value) class Socks5AuthError(ProxyError): - def __init__(self, value): - self.value = value - def __str__(self): - return repr(self.value) + def __init__(self, value): + self.value = value + def __str__(self): + return repr(self.value) class Socks5Error(ProxyError): - def __init__(self, value): - self.value = value - def __str__(self): - return repr(self.value) + def __init__(self, value): + self.value = value + def __str__(self): + return repr(self.value) class Socks4Error(ProxyError): - def __init__(self, value): - self.value = value - def __str__(self): - return repr(self.value) + def __init__(self, value): + self.value = value + def __str__(self): + return repr(self.value) class HTTPError(ProxyError): - def __init__(self, value): - self.value = value - def __str__(self): - return repr(self.value) + def __init__(self, value): + self.value = value + def __str__(self): + return repr(self.value) _generalerrors = ("success", - "invalid data", - "not connected", - "not available", - "bad proxy type", - "bad input") + "invalid data", + "not connected", + "not available", + "bad proxy type", + "bad input") _socks5errors = ("succeeded", - "general SOCKS server failure", - "connection not allowed by ruleset", - "Network unreachable", - "Host unreachable", - "Connection refused", - "TTL expired", - "Command not supported", - "Address type not supported", - "Unknown error") + "general SOCKS server failure", + "connection not allowed by ruleset", + "Network unreachable", + "Host unreachable", + "Connection refused", + "TTL expired", + "Command not supported", + "Address type not supported", + "Unknown error") _socks5autherrors = ("succeeded", - "authentication is required", - "all offered authentication methods were rejected", - "unknown username or invalid password", - "unknown error") + "authentication is required", + "all offered authentication methods were rejected", + "unknown username or invalid password", + "unknown error") _socks4errors = ("request granted", - "request rejected or failed", - "request rejected because SOCKS server cannot connect to identd on the client", - "request rejected because the client program and identd report different user-ids", - "unknown error") + "request rejected or failed", + "request rejected because SOCKS server cannot connect to identd on the client", + "request rejected because the client program and identd report different user-ids", + "unknown error") def setdefaultproxy(proxytype=None,addr=None,port=None,rdns=True,username=None,password=None): - """setdefaultproxy(proxytype, addr[, port[, rdns[, username[, password]]]]) - Sets a default proxy which all further socksocket objects will use, - unless explicitly changed. - """ - global _defaultproxy - _defaultproxy = (proxytype,addr,port,rdns,username,password) - + """setdefaultproxy(proxytype, addr[, port[, rdns[, username[, password]]]]) + Sets a default proxy which all further socksocket objects will use, + unless explicitly changed. + """ + global _defaultproxy + _defaultproxy = (proxytype,addr,port,rdns,username,password) + class socksocket(socket.socket): - """socksocket([family[, type[, proto]]]) -> socket object - - Open a SOCKS enabled socket. The parameters are the same as - those of the standard socket init. In order for SOCKS to work, - you must specify family=AF_INET, type=SOCK_STREAM and proto=0. - """ - - def __init__(self, family=socket.AF_INET, type=socket.SOCK_STREAM, proto=0, _sock=None): - _orgsocket.__init__(self,family,type,proto,_sock) - if _defaultproxy != None: - self.__proxy = _defaultproxy - else: - self.__proxy = (None, None, None, None, None, None) - self.__proxysockname = None - self.__proxypeername = None - - def __recvall(self, bytes): - """__recvall(bytes) -> data - Receive EXACTLY the number of bytes requested from the socket. - Blocks until the required number of bytes have been received. - """ - data = "" - while len(data) < bytes: - d = self.recv(bytes-len(data)) - if not d: - raise GeneralProxyError("connection closed unexpectedly") - data = data + d - return data - - def setproxy(self,proxytype=None,addr=None,port=None,rdns=True,username=None,password=None): - """setproxy(proxytype, addr[, port[, rdns[, username[, password]]]]) - Sets the proxy to be used. - proxytype - The type of the proxy to be used. Three types - are supported: PROXY_TYPE_SOCKS4 (including socks4a), - PROXY_TYPE_SOCKS5 and PROXY_TYPE_HTTP - addr - The address of the server (IP or DNS). - port - The port of the server. Defaults to 1080 for SOCKS - servers and 8080 for HTTP proxy servers. - rdns - Should DNS queries be preformed on the remote side - (rather than the local side). The default is True. - Note: This has no effect with SOCKS4 servers. - username - Username to authenticate with to the server. - The default is no authentication. - password - Password to authenticate with to the server. - Only relevant when username is also provided. - """ - self.__proxy = (proxytype,addr,port,rdns,username,password) - - def __negotiatesocks5(self,destaddr,destport): - """__negotiatesocks5(self,destaddr,destport) - Negotiates a connection through a SOCKS5 server. - """ - # First we'll send the authentication packages we support. - if (self.__proxy[4]!=None) and (self.__proxy[5]!=None): - # The username/password details were supplied to the - # setproxy method so we support the USERNAME/PASSWORD - # authentication (in addition to the standard none). - self.sendall("\x05\x02\x00\x02") - else: - # No username/password were entered, therefore we - # only support connections with no authentication. - self.sendall("\x05\x01\x00") - # We'll receive the server's response to determine which - # method was selected - chosenauth = self.__recvall(2) - if chosenauth[0] != "\x05": - self.close() - raise GeneralProxyError((1,_generalerrors[1])) - # Check the chosen authentication method - if chosenauth[1] == "\x00": - # No authentication is required - pass - elif chosenauth[1] == "\x02": - # Okay, we need to perform a basic username/password - # authentication. - self.sendall("\x01" + chr(len(self.__proxy[4])) + self.__proxy[4] + chr(len(self.__proxy[5])) + self.__proxy[5]) - authstat = self.__recvall(2) - if authstat[0] != "\x01": - # Bad response - self.close() - raise GeneralProxyError((1,_generalerrors[1])) - if authstat[1] != "\x00": - # Authentication failed - self.close() - raise Socks5AuthError,((3,_socks5autherrors[3])) - # Authentication succeeded - else: - # Reaching here is always bad - self.close() - if chosenauth[1] == "\xFF": - raise Socks5AuthError((2,_socks5autherrors[2])) - else: - raise GeneralProxyError((1,_generalerrors[1])) - # Now we can request the actual connection - req = "\x05\x01\x00" - # If the given destination address is an IP address, we'll - # use the IPv4 address request even if remote resolving was specified. - try: - ipaddr = socket.inet_aton(destaddr) - req = req + "\x01" + ipaddr - except socket.error: - # Well it's not an IP number, so it's probably a DNS name. - if self.__proxy[3]==True: - # Resolve remotely - ipaddr = None - req = req + "\x03" + chr(len(destaddr)) + destaddr - else: - # Resolve locally - ipaddr = socket.inet_aton(socket.gethostbyname(destaddr)) - req = req + "\x01" + ipaddr - req = req + struct.pack(">H",destport) - self.sendall(req) - # Get the response - resp = self.__recvall(4) - if resp[0] != "\x05": - self.close() - raise GeneralProxyError((1,_generalerrors[1])) - elif resp[1] != "\x00": - # Connection failed - self.close() - if ord(resp[1])<=8: - raise Socks5Error((ord(resp[1]),_generalerrors[ord(resp[1])])) - else: - raise Socks5Error((9,_generalerrors[9])) - # Get the bound address/port - elif resp[3] == "\x01": - boundaddr = self.__recvall(4) - elif resp[3] == "\x03": - resp = resp + self.recv(1) - boundaddr = self.__recvall(ord(resp[4])) - else: - self.close() - raise GeneralProxyError((1,_generalerrors[1])) - boundport = struct.unpack(">H",self.__recvall(2))[0] - self.__proxysockname = (boundaddr,boundport) - if ipaddr != None: - self.__proxypeername = (socket.inet_ntoa(ipaddr),destport) - else: - self.__proxypeername = (destaddr,destport) - - def getproxysockname(self): - """getsockname() -> address info - Returns the bound IP address and port number at the proxy. - """ - return self.__proxysockname - - def getproxypeername(self): - """getproxypeername() -> address info - Returns the IP and port number of the proxy. - """ - return _orgsocket.getpeername(self) - - def getpeername(self): - """getpeername() -> address info - Returns the IP address and port number of the destination - machine (note: getproxypeername returns the proxy) - """ - return self.__proxypeername - - def __negotiatesocks4(self,destaddr,destport): - """__negotiatesocks4(self,destaddr,destport) - Negotiates a connection through a SOCKS4 server. - """ - # Check if the destination address provided is an IP address - rmtrslv = False - try: - ipaddr = socket.inet_aton(destaddr) - except socket.error: - # It's a DNS name. Check where it should be resolved. - if self.__proxy[3]==True: - ipaddr = "\x00\x00\x00\x01" - rmtrslv = True - else: - ipaddr = socket.inet_aton(socket.gethostbyname(destaddr)) - # Construct the request packet - req = "\x04\x01" + struct.pack(">H",destport) + ipaddr - # The username parameter is considered userid for SOCKS4 - if self.__proxy[4] != None: - req = req + self.__proxy[4] - req = req + "\x00" - # DNS name if remote resolving is required - # NOTE: This is actually an extension to the SOCKS4 protocol - # called SOCKS4A and may not be supported in all cases. - if rmtrslv==True: - req = req + destaddr + "\x00" - self.sendall(req) - # Get the response from the server - resp = self.__recvall(8) - if resp[0] != "\x00": - # Bad data - self.close() - raise GeneralProxyError((1,_generalerrors[1])) - if resp[1] != "\x5A": - # Server returned an error - self.close() - if ord(resp[1]) in (91,92,93): - self.close() - raise Socks4Error((ord(resp[1]),_socks4errors[ord(resp[1])-90])) - else: - raise Socks4Error((94,_socks4errors[4])) - # Get the bound address/port - self.__proxysockname = (socket.inet_ntoa(resp[4:]),struct.unpack(">H",resp[2:4])[0]) - if rmtrslv != None: - self.__proxypeername = (socket.inet_ntoa(ipaddr),destport) - else: - self.__proxypeername = (destaddr,destport) - - def __negotiatehttp(self,destaddr,destport): - """__negotiatehttp(self,destaddr,destport) - Negotiates a connection through an HTTP server. - """ - # If we need to resolve locally, we do this now - if self.__proxy[3] == False: - addr = socket.gethostbyname(destaddr) - else: - addr = destaddr - self.sendall("CONNECT " + addr + ":" + str(destport) + " HTTP/1.1\r\n" + "Host: " + destaddr + "\r\n\r\n") - # We read the response until we get the string "\r\n\r\n" - resp = self.recv(1) - while resp.find("\r\n\r\n")==-1: - resp = resp + self.recv(1) - # We just need the first line to check if the connection - # was successful - statusline = resp.splitlines()[0].split(" ",2) - if statusline[0] not in ("HTTP/1.0","HTTP/1.1"): - self.close() - raise GeneralProxyError((1,_generalerrors[1])) - try: - statuscode = int(statusline[1]) - except ValueError: - self.close() - raise GeneralProxyError((1,_generalerrors[1])) - if statuscode != 200: - self.close() - raise HTTPError((statuscode,statusline[2])) - self.__proxysockname = ("0.0.0.0",0) - self.__proxypeername = (addr,destport) - - def connect(self,destpair): - """connect(self,despair) - Connects to the specified destination through a proxy. - destpar - A tuple of the IP/DNS address and the port number. - (identical to socket's connect). - To select the proxy server use setproxy(). - """ - # Do a minimal input check first - if (type(destpair) in (list,tuple)==False) or (len(destpair)<2) or (type(destpair[0])!=str) or (type(destpair[1])!=int): - raise GeneralProxyError((5,_generalerrors[5])) - if self.__proxy[0] == PROXY_TYPE_SOCKS5: - if self.__proxy[2] != None: - portnum = self.__proxy[2] - else: - portnum = 1080 - _orgsocket.connect(self,(self.__proxy[1],portnum)) - self.__negotiatesocks5(destpair[0],destpair[1]) - elif self.__proxy[0] == PROXY_TYPE_SOCKS4: - if self.__proxy[2] != None: - portnum = self.__proxy[2] - else: - portnum = 1080 - _orgsocket.connect(self,(self.__proxy[1],portnum)) - self.__negotiatesocks4(destpair[0],destpair[1]) - elif self.__proxy[0] == PROXY_TYPE_HTTP: - if self.__proxy[2] != None: - portnum = self.__proxy[2] - else: - portnum = 8080 - _orgsocket.connect(self,(self.__proxy[1],portnum)) - self.__negotiatehttp(destpair[0],destpair[1]) - elif self.__proxy[0] == None: - _orgsocket.connect(self,(destpair[0],destpair[1])) - else: - raise GeneralProxyError((4,_generalerrors[4])) + """socksocket([family[, type[, proto]]]) -> socket object + + Open a SOCKS enabled socket. The parameters are the same as + those of the standard socket init. In order for SOCKS to work, + you must specify family=AF_INET, type=SOCK_STREAM and proto=0. + """ + + def __init__(self, family=socket.AF_INET, type=socket.SOCK_STREAM, proto=0, _sock=None): + _orgsocket.__init__(self,family,type,proto,_sock) + if _defaultproxy != None: + self.__proxy = _defaultproxy + else: + self.__proxy = (None, None, None, None, None, None) + self.__proxysockname = None + self.__proxypeername = None + + def __recvall(self, bytes): + """__recvall(bytes) -> data + Receive EXACTLY the number of bytes requested from the socket. + Blocks until the required number of bytes have been received. + """ + data = "" + while len(data) < bytes: + d = self.recv(bytes-len(data)) + if not d: + raise GeneralProxyError("connection closed unexpectedly") + data = data + d + return data + + def setproxy(self,proxytype=None,addr=None,port=None,rdns=True,username=None,password=None): + """setproxy(proxytype, addr[, port[, rdns[, username[, password]]]]) + Sets the proxy to be used. + proxytype - The type of the proxy to be used. Three types + are supported: PROXY_TYPE_SOCKS4 (including socks4a), + PROXY_TYPE_SOCKS5 and PROXY_TYPE_HTTP + addr - The address of the server (IP or DNS). + port - The port of the server. Defaults to 1080 for SOCKS + servers and 8080 for HTTP proxy servers. + rdns - Should DNS queries be preformed on the remote side + (rather than the local side). The default is True. + Note: This has no effect with SOCKS4 servers. + username - Username to authenticate with to the server. + The default is no authentication. + password - Password to authenticate with to the server. + Only relevant when username is also provided. + """ + self.__proxy = (proxytype,addr,port,rdns,username,password) + + def __negotiatesocks5(self,destaddr,destport): + """__negotiatesocks5(self,destaddr,destport) + Negotiates a connection through a SOCKS5 server. + """ + # First we'll send the authentication packages we support. + if (self.__proxy[4]!=None) and (self.__proxy[5]!=None): + # The username/password details were supplied to the + # setproxy method so we support the USERNAME/PASSWORD + # authentication (in addition to the standard none). + self.sendall("\x05\x02\x00\x02") + else: + # No username/password were entered, therefore we + # only support connections with no authentication. + self.sendall("\x05\x01\x00") + # We'll receive the server's response to determine which + # method was selected + chosenauth = self.__recvall(2) + if chosenauth[0] != "\x05": + self.close() + raise GeneralProxyError((1,_generalerrors[1])) + # Check the chosen authentication method + if chosenauth[1] == "\x00": + # No authentication is required + pass + elif chosenauth[1] == "\x02": + # Okay, we need to perform a basic username/password + # authentication. + self.sendall("\x01" + chr(len(self.__proxy[4])) + self.__proxy[4] + chr(len(self.__proxy[5])) + self.__proxy[5]) + authstat = self.__recvall(2) + if authstat[0] != "\x01": + # Bad response + self.close() + raise GeneralProxyError((1,_generalerrors[1])) + if authstat[1] != "\x00": + # Authentication failed + self.close() + raise Socks5AuthError,((3,_socks5autherrors[3])) + # Authentication succeeded + else: + # Reaching here is always bad + self.close() + if chosenauth[1] == "\xFF": + raise Socks5AuthError((2,_socks5autherrors[2])) + else: + raise GeneralProxyError((1,_generalerrors[1])) + # Now we can request the actual connection + req = "\x05\x01\x00" + # If the given destination address is an IP address, we'll + # use the IPv4 address request even if remote resolving was specified. + try: + ipaddr = socket.inet_aton(destaddr) + req = req + "\x01" + ipaddr + except socket.error: + # Well it's not an IP number, so it's probably a DNS name. + if self.__proxy[3]==True: + # Resolve remotely + ipaddr = None + req = req + "\x03" + chr(len(destaddr)) + destaddr + else: + # Resolve locally + ipaddr = socket.inet_aton(socket.gethostbyname(destaddr)) + req = req + "\x01" + ipaddr + req = req + struct.pack(">H",destport) + self.sendall(req) + # Get the response + resp = self.__recvall(4) + if resp[0] != "\x05": + self.close() + raise GeneralProxyError((1,_generalerrors[1])) + elif resp[1] != "\x00": + # Connection failed + self.close() + if ord(resp[1])<=8: + raise Socks5Error((ord(resp[1]),_generalerrors[ord(resp[1])])) + else: + raise Socks5Error((9,_generalerrors[9])) + # Get the bound address/port + elif resp[3] == "\x01": + boundaddr = self.__recvall(4) + elif resp[3] == "\x03": + resp = resp + self.recv(1) + boundaddr = self.__recvall(ord(resp[4])) + else: + self.close() + raise GeneralProxyError((1,_generalerrors[1])) + boundport = struct.unpack(">H",self.__recvall(2))[0] + self.__proxysockname = (boundaddr,boundport) + if ipaddr != None: + self.__proxypeername = (socket.inet_ntoa(ipaddr),destport) + else: + self.__proxypeername = (destaddr,destport) + + def getproxysockname(self): + """getsockname() -> address info + Returns the bound IP address and port number at the proxy. + """ + return self.__proxysockname + + def getproxypeername(self): + """getproxypeername() -> address info + Returns the IP and port number of the proxy. + """ + return _orgsocket.getpeername(self) + + def getpeername(self): + """getpeername() -> address info + Returns the IP address and port number of the destination + machine (note: getproxypeername returns the proxy) + """ + return self.__proxypeername + + def __negotiatesocks4(self,destaddr,destport): + """__negotiatesocks4(self,destaddr,destport) + Negotiates a connection through a SOCKS4 server. + """ + # Check if the destination address provided is an IP address + rmtrslv = False + try: + ipaddr = socket.inet_aton(destaddr) + except socket.error: + # It's a DNS name. Check where it should be resolved. + if self.__proxy[3]==True: + ipaddr = "\x00\x00\x00\x01" + rmtrslv = True + else: + ipaddr = socket.inet_aton(socket.gethostbyname(destaddr)) + # Construct the request packet + req = "\x04\x01" + struct.pack(">H",destport) + ipaddr + # The username parameter is considered userid for SOCKS4 + if self.__proxy[4] != None: + req = req + self.__proxy[4] + req = req + "\x00" + # DNS name if remote resolving is required + # NOTE: This is actually an extension to the SOCKS4 protocol + # called SOCKS4A and may not be supported in all cases. + if rmtrslv==True: + req = req + destaddr + "\x00" + self.sendall(req) + # Get the response from the server + resp = self.__recvall(8) + if resp[0] != "\x00": + # Bad data + self.close() + raise GeneralProxyError((1,_generalerrors[1])) + if resp[1] != "\x5A": + # Server returned an error + self.close() + if ord(resp[1]) in (91,92,93): + self.close() + raise Socks4Error((ord(resp[1]),_socks4errors[ord(resp[1])-90])) + else: + raise Socks4Error((94,_socks4errors[4])) + # Get the bound address/port + self.__proxysockname = (socket.inet_ntoa(resp[4:]),struct.unpack(">H",resp[2:4])[0]) + if rmtrslv != None: + self.__proxypeername = (socket.inet_ntoa(ipaddr),destport) + else: + self.__proxypeername = (destaddr,destport) + + def __negotiatehttp(self,destaddr,destport): + """__negotiatehttp(self,destaddr,destport) + Negotiates a connection through an HTTP server. + """ + # If we need to resolve locally, we do this now + if self.__proxy[3] == False: + addr = socket.gethostbyname(destaddr) + else: + addr = destaddr + self.sendall("CONNECT " + addr + ":" + str(destport) + " HTTP/1.1\r\n" + "Host: " + destaddr + "\r\n\r\n") + # We read the response until we get the string "\r\n\r\n" + resp = self.recv(1) + while resp.find("\r\n\r\n")==-1: + resp = resp + self.recv(1) + # We just need the first line to check if the connection + # was successful + statusline = resp.splitlines()[0].split(" ",2) + if statusline[0] not in ("HTTP/1.0","HTTP/1.1"): + self.close() + raise GeneralProxyError((1,_generalerrors[1])) + try: + statuscode = int(statusline[1]) + except ValueError: + self.close() + raise GeneralProxyError((1,_generalerrors[1])) + if statuscode != 200: + self.close() + raise HTTPError((statuscode,statusline[2])) + self.__proxysockname = ("0.0.0.0",0) + self.__proxypeername = (addr,destport) + + def connect(self,destpair): + """connect(self,despair) + Connects to the specified destination through a proxy. + destpar - A tuple of the IP/DNS address and the port number. + (identical to socket's connect). + To select the proxy server use setproxy(). + """ + # Do a minimal input check first + if (type(destpair) in (list,tuple)==False) or (len(destpair)<2) or (type(destpair[0])!=str) or (type(destpair[1])!=int): + raise GeneralProxyError((5,_generalerrors[5])) + if self.__proxy[0] == PROXY_TYPE_SOCKS5: + if self.__proxy[2] != None: + portnum = self.__proxy[2] + else: + portnum = 1080 + _orgsocket.connect(self,(self.__proxy[1],portnum)) + self.__negotiatesocks5(destpair[0],destpair[1]) + elif self.__proxy[0] == PROXY_TYPE_SOCKS4: + if self.__proxy[2] != None: + portnum = self.__proxy[2] + else: + portnum = 1080 + _orgsocket.connect(self,(self.__proxy[1],portnum)) + self.__negotiatesocks4(destpair[0],destpair[1]) + elif self.__proxy[0] == PROXY_TYPE_HTTP: + if self.__proxy[2] != None: + portnum = self.__proxy[2] + else: + portnum = 8080 + _orgsocket.connect(self,(self.__proxy[1],portnum)) + self.__negotiatehttp(destpair[0],destpair[1]) + elif self.__proxy[0] == None: + _orgsocket.connect(self,(destpair[0],destpair[1])) + else: + raise GeneralProxyError((4,_generalerrors[4])) diff --git a/src/searchengine/nova3/nova2dl.py b/src/searchengine/nova3/nova2dl.py index eb64c7e03..0d97c0154 100644 --- a/src/searchengine/nova3/nova2dl.py +++ b/src/searchengine/nova3/nova2dl.py @@ -25,7 +25,7 @@ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. -#VERSION: 1.10 +#VERSION: 1.20 # Author: # Christophe DUMEZ (chris@qbittorrent.org) @@ -39,26 +39,26 @@ supported_engines = dict() engines = glob.glob(os.path.join(os.path.dirname(__file__), 'engines','*.py')) for engine in engines: - e = engine.split(os.sep)[-1][:-3] - if len(e.strip()) == 0: continue - if e.startswith('_'): continue - try: - exec("from engines.%s import %s"%(e,e)) - exec("engine_url = %s.url"%e) - supported_engines[engine_url] = e - except: - pass + e = engine.split(os.sep)[-1][:-3] + if len(e.strip()) == 0: continue + if e.startswith('_'): continue + try: + exec("from engines.%s import %s"%(e,e)) + exec("engine_url = %s.url"%e) + supported_engines[engine_url] = e + except: + pass if __name__ == '__main__': - if len(sys.argv) < 3: - raise SystemExit('./nova2dl.py engine_url download_parameter') - engine_url = sys.argv[1].strip() - download_param = sys.argv[2].strip() - if engine_url not in list(supported_engines.keys()): - raise SystemExit('./nova2dl.py: this engine_url was not recognized') - exec("engine = %s()"%supported_engines[engine_url]) - if hasattr(engine, 'download_torrent'): - engine.download_torrent(download_param) - else: - print(download_file(download_param)) - sys.exit(0) + if len(sys.argv) < 3: + raise SystemExit('./nova2dl.py engine_url download_parameter') + engine_url = sys.argv[1].strip() + download_param = sys.argv[2].strip() + if engine_url not in list(supported_engines.keys()): + raise SystemExit('./nova2dl.py: this engine_url was not recognized') + exec("engine = %s()"%supported_engines[engine_url]) + if hasattr(engine, 'download_torrent'): + engine.download_torrent(download_param) + else: + print(download_file(download_param)) + sys.exit(0) diff --git a/src/searchengine/nova3/novaprinter.py b/src/searchengine/nova3/novaprinter.py index c90008868..3e3391a97 100644 --- a/src/searchengine/nova3/novaprinter.py +++ b/src/searchengine/nova3/novaprinter.py @@ -26,41 +26,39 @@ def prettyPrinter(dictionary): - outtext = '' - dictionary['size'] = anySizeToBytes(dictionary['size']) - if 'desc_link' in dictionary: - outtext = '%s|%s|%s|%s|%s|%s|%s'%(dictionary['link'],dictionary['name'].replace('|',' '),dictionary['size'],dictionary['seeds'],dictionary['leech'],dictionary['engine_url'],dictionary['desc_link']) - else: - outtext = '%s|%s|%s|%s|%s|%s'%(dictionary['link'],dictionary['name'].replace('|',' '),dictionary['size'],dictionary['seeds'],dictionary['leech'],dictionary['engine_url']) + dictionary['size'] = anySizeToBytes(dictionary['size']) + outtext = "|".join((dictionary["link"], dictionary["name"].replace("|", " "), str(dictionary["size"]), str(dictionary["seeds"]), str(dictionary["leech"]), dictionary["engine_url"])) + if 'desc_link' in dictionary: + outtext = "|".join((outtext, dictionary["desc_link"])) - # fd 1 is stdout - with open(1, 'w', encoding='utf-8', closefd=False) as utf8stdout: - print(outtext, file=utf8stdout) + # fd 1 is stdout + with open(1, 'w', encoding='utf-8', closefd=False) as utf8stdout: + print(outtext, file=utf8stdout) def anySizeToBytes(size_string): - """ - Convert a string like '1 KB' to '1024' (bytes) - """ - # separate integer from unit - try: - size, unit = size_string.split() - except: - try: - size = size_string.strip() - unit = ''.join([c for c in size if c.isalpha()]) - if len(unit) > 0: - size = size[:-len(unit)] - except: - return -1 - if len(size) == 0: - return -1 - size = float(size) - if len(unit) == 0: - return int(size) - short_unit = unit.upper()[0] + """ + Convert a string like '1 KB' to '1024' (bytes) + """ + # separate integer from unit + try: + size, unit = size_string.split() + except: + try: + size = size_string.strip() + unit = ''.join([c for c in size if c.isalpha()]) + if len(unit) > 0: + size = size[:-len(unit)] + except: + return -1 + if len(size) == 0: + return -1 + size = float(size) + if len(unit) == 0: + return int(size) + short_unit = unit.upper()[0] - # convert - units_dict = { 'T': 40, 'G': 30, 'M': 20, 'K': 10 } - if short_unit in units_dict: - size = size * 2**units_dict[short_unit] - return int(size) + # convert + units_dict = {'T': 40, 'G': 30, 'M': 20, 'K': 10} + if short_unit in units_dict: + size = size * 2**units_dict[short_unit] + return int(size) diff --git a/src/searchengine/nova3/socks.py b/src/searchengine/nova3/socks.py index d6b61fd97..b1b678f89 100644 --- a/src/searchengine/nova3/socks.py +++ b/src/searchengine/nova3/socks.py @@ -42,350 +42,350 @@ _defaultproxy = None _orgsocket = socket.socket class ProxyError(Exception): - def __init__(self, value): - self.value = value - def __str__(self): - return repr(self.value) + def __init__(self, value): + self.value = value + def __str__(self): + return repr(self.value) class GeneralProxyError(ProxyError): - def __init__(self, value): - self.value = value - def __str__(self): - return repr(self.value) + def __init__(self, value): + self.value = value + def __str__(self): + return repr(self.value) class Socks5AuthError(ProxyError): - def __init__(self, value): - self.value = value - def __str__(self): - return repr(self.value) + def __init__(self, value): + self.value = value + def __str__(self): + return repr(self.value) class Socks5Error(ProxyError): - def __init__(self, value): - self.value = value - def __str__(self): - return repr(self.value) + def __init__(self, value): + self.value = value + def __str__(self): + return repr(self.value) class Socks4Error(ProxyError): - def __init__(self, value): - self.value = value - def __str__(self): - return repr(self.value) + def __init__(self, value): + self.value = value + def __str__(self): + return repr(self.value) class HTTPError(ProxyError): - def __init__(self, value): - self.value = value - def __str__(self): - return repr(self.value) + def __init__(self, value): + self.value = value + def __str__(self): + return repr(self.value) _generalerrors = ("success", - "invalid data", - "not connected", - "not available", - "bad proxy type", - "bad input") + "invalid data", + "not connected", + "not available", + "bad proxy type", + "bad input") _socks5errors = ("succeeded", - "general SOCKS server failure", - "connection not allowed by ruleset", - "Network unreachable", - "Host unreachable", - "Connection refused", - "TTL expired", - "Command not supported", - "Address type not supported", - "Unknown error") + "general SOCKS server failure", + "connection not allowed by ruleset", + "Network unreachable", + "Host unreachable", + "Connection refused", + "TTL expired", + "Command not supported", + "Address type not supported", + "Unknown error") _socks5autherrors = ("succeeded", - "authentication is required", - "all offered authentication methods were rejected", - "unknown username or invalid password", - "unknown error") + "authentication is required", + "all offered authentication methods were rejected", + "unknown username or invalid password", + "unknown error") _socks4errors = ("request granted", - "request rejected or failed", - "request rejected because SOCKS server cannot connect to identd on the client", - "request rejected because the client program and identd report different user-ids", - "unknown error") + "request rejected or failed", + "request rejected because SOCKS server cannot connect to identd on the client", + "request rejected because the client program and identd report different user-ids", + "unknown error") def setdefaultproxy(proxytype=None,addr=None,port=None,rdns=True,username=None,password=None): - """setdefaultproxy(proxytype, addr[, port[, rdns[, username[, password]]]]) - Sets a default proxy which all further socksocket objects will use, - unless explicitly changed. - """ - global _defaultproxy - _defaultproxy = (proxytype,addr,port,rdns,username,password) - + """setdefaultproxy(proxytype, addr[, port[, rdns[, username[, password]]]]) + Sets a default proxy which all further socksocket objects will use, + unless explicitly changed. + """ + global _defaultproxy + _defaultproxy = (proxytype,addr,port,rdns,username,password) + class socksocket(socket.socket): - """socksocket([family[, type[, proto]]]) -> socket object - - Open a SOCKS enabled socket. The parameters are the same as - those of the standard socket init. In order for SOCKS to work, - you must specify family=AF_INET, type=SOCK_STREAM and proto=0. - """ - - def __init__(self, family=socket.AF_INET, type=socket.SOCK_STREAM, proto=0, _sock=None): - _orgsocket.__init__(self,family,type,proto,_sock) - if _defaultproxy != None: - self.__proxy = _defaultproxy - else: - self.__proxy = (None, None, None, None, None, None) - self.__proxysockname = None - self.__proxypeername = None - - def __recvall(self, bytes): - """__recvall(bytes) -> data - Receive EXACTLY the number of bytes requested from the socket. - Blocks until the required number of bytes have been received. - """ - data = "" - while len(data) < bytes: - d = self.recv(bytes-len(data)) - if not d: - raise GeneralProxyError("connection closed unexpectedly") - data = data + d - return data - - def setproxy(self,proxytype=None,addr=None,port=None,rdns=True,username=None,password=None): - """setproxy(proxytype, addr[, port[, rdns[, username[, password]]]]) - Sets the proxy to be used. - proxytype - The type of the proxy to be used. Three types - are supported: PROXY_TYPE_SOCKS4 (including socks4a), - PROXY_TYPE_SOCKS5 and PROXY_TYPE_HTTP - addr - The address of the server (IP or DNS). - port - The port of the server. Defaults to 1080 for SOCKS - servers and 8080 for HTTP proxy servers. - rdns - Should DNS queries be preformed on the remote side - (rather than the local side). The default is True. - Note: This has no effect with SOCKS4 servers. - username - Username to authenticate with to the server. - The default is no authentication. - password - Password to authenticate with to the server. - Only relevant when username is also provided. - """ - self.__proxy = (proxytype,addr,port,rdns,username,password) - - def __negotiatesocks5(self,destaddr,destport): - """__negotiatesocks5(self,destaddr,destport) - Negotiates a connection through a SOCKS5 server. - """ - # First we'll send the authentication packages we support. - if (self.__proxy[4]!=None) and (self.__proxy[5]!=None): - # The username/password details were supplied to the - # setproxy method so we support the USERNAME/PASSWORD - # authentication (in addition to the standard none). - self.sendall("\x05\x02\x00\x02") - else: - # No username/password were entered, therefore we - # only support connections with no authentication. - self.sendall("\x05\x01\x00") - # We'll receive the server's response to determine which - # method was selected - chosenauth = self.__recvall(2) - if chosenauth[0] != "\x05": - self.close() - raise GeneralProxyError((1,_generalerrors[1])) - # Check the chosen authentication method - if chosenauth[1] == "\x00": - # No authentication is required - pass - elif chosenauth[1] == "\x02": - # Okay, we need to perform a basic username/password - # authentication. - self.sendall("\x01" + chr(len(self.__proxy[4])) + self.__proxy[4] + chr(len(self.__proxy[5])) + self.__proxy[5]) - authstat = self.__recvall(2) - if authstat[0] != "\x01": - # Bad response - self.close() - raise GeneralProxyError((1,_generalerrors[1])) - if authstat[1] != "\x00": - # Authentication failed - self.close() - raise Socks5AuthError((3,_socks5autherrors[3])) - # Authentication succeeded - else: - # Reaching here is always bad - self.close() - if chosenauth[1] == "\xFF": - raise Socks5AuthError((2,_socks5autherrors[2])) - else: - raise GeneralProxyError((1,_generalerrors[1])) - # Now we can request the actual connection - req = "\x05\x01\x00" - # If the given destination address is an IP address, we'll - # use the IPv4 address request even if remote resolving was specified. - try: - ipaddr = socket.inet_aton(destaddr) - req = req + "\x01" + ipaddr - except socket.error: - # Well it's not an IP number, so it's probably a DNS name. - if self.__proxy[3]==True: - # Resolve remotely - ipaddr = None - req = req + "\x03" + chr(len(destaddr)) + destaddr - else: - # Resolve locally - ipaddr = socket.inet_aton(socket.gethostbyname(destaddr)) - req = req + "\x01" + ipaddr - req = req + struct.pack(">H",destport) - self.sendall(req) - # Get the response - resp = self.__recvall(4) - if resp[0] != "\x05": - self.close() - raise GeneralProxyError((1,_generalerrors[1])) - elif resp[1] != "\x00": - # Connection failed - self.close() - if ord(resp[1])<=8: - raise Socks5Error((ord(resp[1]),_generalerrors[ord(resp[1])])) - else: - raise Socks5Error((9,_generalerrors[9])) - # Get the bound address/port - elif resp[3] == "\x01": - boundaddr = self.__recvall(4) - elif resp[3] == "\x03": - resp = resp + self.recv(1) - boundaddr = self.__recvall(ord(resp[4])) - else: - self.close() - raise GeneralProxyError((1,_generalerrors[1])) - boundport = struct.unpack(">H",self.__recvall(2))[0] - self.__proxysockname = (boundaddr,boundport) - if ipaddr != None: - self.__proxypeername = (socket.inet_ntoa(ipaddr),destport) - else: - self.__proxypeername = (destaddr,destport) - - def getproxysockname(self): - """getsockname() -> address info - Returns the bound IP address and port number at the proxy. - """ - return self.__proxysockname - - def getproxypeername(self): - """getproxypeername() -> address info - Returns the IP and port number of the proxy. - """ - return _orgsocket.getpeername(self) - - def getpeername(self): - """getpeername() -> address info - Returns the IP address and port number of the destination - machine (note: getproxypeername returns the proxy) - """ - return self.__proxypeername - - def __negotiatesocks4(self,destaddr,destport): - """__negotiatesocks4(self,destaddr,destport) - Negotiates a connection through a SOCKS4 server. - """ - # Check if the destination address provided is an IP address - rmtrslv = False - try: - ipaddr = socket.inet_aton(destaddr) - except socket.error: - # It's a DNS name. Check where it should be resolved. - if self.__proxy[3]==True: - ipaddr = "\x00\x00\x00\x01" - rmtrslv = True - else: - ipaddr = socket.inet_aton(socket.gethostbyname(destaddr)) - # Construct the request packet - req = "\x04\x01" + struct.pack(">H",destport) + ipaddr - # The username parameter is considered userid for SOCKS4 - if self.__proxy[4] != None: - req = req + self.__proxy[4] - req = req + "\x00" - # DNS name if remote resolving is required - # NOTE: This is actually an extension to the SOCKS4 protocol - # called SOCKS4A and may not be supported in all cases. - if rmtrslv==True: - req = req + destaddr + "\x00" - self.sendall(req) - # Get the response from the server - resp = self.__recvall(8) - if resp[0] != "\x00": - # Bad data - self.close() - raise GeneralProxyError((1,_generalerrors[1])) - if resp[1] != "\x5A": - # Server returned an error - self.close() - if ord(resp[1]) in (91,92,93): - self.close() - raise Socks4Error((ord(resp[1]),_socks4errors[ord(resp[1])-90])) - else: - raise Socks4Error((94,_socks4errors[4])) - # Get the bound address/port - self.__proxysockname = (socket.inet_ntoa(resp[4:]),struct.unpack(">H",resp[2:4])[0]) - if rmtrslv != None: - self.__proxypeername = (socket.inet_ntoa(ipaddr),destport) - else: - self.__proxypeername = (destaddr,destport) - - def __negotiatehttp(self,destaddr,destport): - """__negotiatehttp(self,destaddr,destport) - Negotiates a connection through an HTTP server. - """ - # If we need to resolve locally, we do this now - if self.__proxy[3] == False: - addr = socket.gethostbyname(destaddr) - else: - addr = destaddr - self.sendall("CONNECT " + addr + ":" + str(destport) + " HTTP/1.1\r\n" + "Host: " + destaddr + "\r\n\r\n") - # We read the response until we get the string "\r\n\r\n" - resp = self.recv(1) - while resp.find("\r\n\r\n")==-1: - resp = resp + self.recv(1) - # We just need the first line to check if the connection - # was successful - statusline = resp.splitlines()[0].split(" ",2) - if statusline[0] not in ("HTTP/1.0","HTTP/1.1"): - self.close() - raise GeneralProxyError((1,_generalerrors[1])) - try: - statuscode = int(statusline[1]) - except ValueError: - self.close() - raise GeneralProxyError((1,_generalerrors[1])) - if statuscode != 200: - self.close() - raise HTTPError((statuscode,statusline[2])) - self.__proxysockname = ("0.0.0.0",0) - self.__proxypeername = (addr,destport) - - def connect(self,destpair): - """connect(self,despair) - Connects to the specified destination through a proxy. - destpar - A tuple of the IP/DNS address and the port number. - (identical to socket's connect). - To select the proxy server use setproxy(). - """ - # Do a minimal input check first - if (type(destpair) in (list,tuple)==False) or (len(destpair)<2) or (type(destpair[0])!=str) or (type(destpair[1])!=int): - raise GeneralProxyError((5,_generalerrors[5])) - if self.__proxy[0] == PROXY_TYPE_SOCKS5: - if self.__proxy[2] != None: - portnum = self.__proxy[2] - else: - portnum = 1080 - _orgsocket.connect(self,(self.__proxy[1],portnum)) - self.__negotiatesocks5(destpair[0],destpair[1]) - elif self.__proxy[0] == PROXY_TYPE_SOCKS4: - if self.__proxy[2] != None: - portnum = self.__proxy[2] - else: - portnum = 1080 - _orgsocket.connect(self,(self.__proxy[1],portnum)) - self.__negotiatesocks4(destpair[0],destpair[1]) - elif self.__proxy[0] == PROXY_TYPE_HTTP: - if self.__proxy[2] != None: - portnum = self.__proxy[2] - else: - portnum = 8080 - _orgsocket.connect(self,(self.__proxy[1],portnum)) - self.__negotiatehttp(destpair[0],destpair[1]) - elif self.__proxy[0] == None: - _orgsocket.connect(self,(destpair[0],destpair[1])) - else: - raise GeneralProxyError((4,_generalerrors[4])) + """socksocket([family[, type[, proto]]]) -> socket object + + Open a SOCKS enabled socket. The parameters are the same as + those of the standard socket init. In order for SOCKS to work, + you must specify family=AF_INET, type=SOCK_STREAM and proto=0. + """ + + def __init__(self, family=socket.AF_INET, type=socket.SOCK_STREAM, proto=0, _sock=None): + _orgsocket.__init__(self,family,type,proto,_sock) + if _defaultproxy != None: + self.__proxy = _defaultproxy + else: + self.__proxy = (None, None, None, None, None, None) + self.__proxysockname = None + self.__proxypeername = None + + def __recvall(self, bytes): + """__recvall(bytes) -> data + Receive EXACTLY the number of bytes requested from the socket. + Blocks until the required number of bytes have been received. + """ + data = "" + while len(data) < bytes: + d = self.recv(bytes-len(data)) + if not d: + raise GeneralProxyError("connection closed unexpectedly") + data = data + d + return data + + def setproxy(self,proxytype=None,addr=None,port=None,rdns=True,username=None,password=None): + """setproxy(proxytype, addr[, port[, rdns[, username[, password]]]]) + Sets the proxy to be used. + proxytype - The type of the proxy to be used. Three types + are supported: PROXY_TYPE_SOCKS4 (including socks4a), + PROXY_TYPE_SOCKS5 and PROXY_TYPE_HTTP + addr - The address of the server (IP or DNS). + port - The port of the server. Defaults to 1080 for SOCKS + servers and 8080 for HTTP proxy servers. + rdns - Should DNS queries be preformed on the remote side + (rather than the local side). The default is True. + Note: This has no effect with SOCKS4 servers. + username - Username to authenticate with to the server. + The default is no authentication. + password - Password to authenticate with to the server. + Only relevant when username is also provided. + """ + self.__proxy = (proxytype,addr,port,rdns,username,password) + + def __negotiatesocks5(self,destaddr,destport): + """__negotiatesocks5(self,destaddr,destport) + Negotiates a connection through a SOCKS5 server. + """ + # First we'll send the authentication packages we support. + if (self.__proxy[4]!=None) and (self.__proxy[5]!=None): + # The username/password details were supplied to the + # setproxy method so we support the USERNAME/PASSWORD + # authentication (in addition to the standard none). + self.sendall("\x05\x02\x00\x02") + else: + # No username/password were entered, therefore we + # only support connections with no authentication. + self.sendall("\x05\x01\x00") + # We'll receive the server's response to determine which + # method was selected + chosenauth = self.__recvall(2) + if chosenauth[0] != "\x05": + self.close() + raise GeneralProxyError((1,_generalerrors[1])) + # Check the chosen authentication method + if chosenauth[1] == "\x00": + # No authentication is required + pass + elif chosenauth[1] == "\x02": + # Okay, we need to perform a basic username/password + # authentication. + self.sendall("\x01" + chr(len(self.__proxy[4])) + self.__proxy[4] + chr(len(self.__proxy[5])) + self.__proxy[5]) + authstat = self.__recvall(2) + if authstat[0] != "\x01": + # Bad response + self.close() + raise GeneralProxyError((1,_generalerrors[1])) + if authstat[1] != "\x00": + # Authentication failed + self.close() + raise Socks5AuthError((3,_socks5autherrors[3])) + # Authentication succeeded + else: + # Reaching here is always bad + self.close() + if chosenauth[1] == "\xFF": + raise Socks5AuthError((2,_socks5autherrors[2])) + else: + raise GeneralProxyError((1,_generalerrors[1])) + # Now we can request the actual connection + req = "\x05\x01\x00" + # If the given destination address is an IP address, we'll + # use the IPv4 address request even if remote resolving was specified. + try: + ipaddr = socket.inet_aton(destaddr) + req = req + "\x01" + ipaddr + except socket.error: + # Well it's not an IP number, so it's probably a DNS name. + if self.__proxy[3]==True: + # Resolve remotely + ipaddr = None + req = req + "\x03" + chr(len(destaddr)) + destaddr + else: + # Resolve locally + ipaddr = socket.inet_aton(socket.gethostbyname(destaddr)) + req = req + "\x01" + ipaddr + req = req + struct.pack(">H",destport) + self.sendall(req) + # Get the response + resp = self.__recvall(4) + if resp[0] != "\x05": + self.close() + raise GeneralProxyError((1,_generalerrors[1])) + elif resp[1] != "\x00": + # Connection failed + self.close() + if ord(resp[1])<=8: + raise Socks5Error((ord(resp[1]),_generalerrors[ord(resp[1])])) + else: + raise Socks5Error((9,_generalerrors[9])) + # Get the bound address/port + elif resp[3] == "\x01": + boundaddr = self.__recvall(4) + elif resp[3] == "\x03": + resp = resp + self.recv(1) + boundaddr = self.__recvall(ord(resp[4])) + else: + self.close() + raise GeneralProxyError((1,_generalerrors[1])) + boundport = struct.unpack(">H",self.__recvall(2))[0] + self.__proxysockname = (boundaddr,boundport) + if ipaddr != None: + self.__proxypeername = (socket.inet_ntoa(ipaddr),destport) + else: + self.__proxypeername = (destaddr,destport) + + def getproxysockname(self): + """getsockname() -> address info + Returns the bound IP address and port number at the proxy. + """ + return self.__proxysockname + + def getproxypeername(self): + """getproxypeername() -> address info + Returns the IP and port number of the proxy. + """ + return _orgsocket.getpeername(self) + + def getpeername(self): + """getpeername() -> address info + Returns the IP address and port number of the destination + machine (note: getproxypeername returns the proxy) + """ + return self.__proxypeername + + def __negotiatesocks4(self,destaddr,destport): + """__negotiatesocks4(self,destaddr,destport) + Negotiates a connection through a SOCKS4 server. + """ + # Check if the destination address provided is an IP address + rmtrslv = False + try: + ipaddr = socket.inet_aton(destaddr) + except socket.error: + # It's a DNS name. Check where it should be resolved. + if self.__proxy[3]==True: + ipaddr = "\x00\x00\x00\x01" + rmtrslv = True + else: + ipaddr = socket.inet_aton(socket.gethostbyname(destaddr)) + # Construct the request packet + req = "\x04\x01" + struct.pack(">H",destport) + ipaddr + # The username parameter is considered userid for SOCKS4 + if self.__proxy[4] != None: + req = req + self.__proxy[4] + req = req + "\x00" + # DNS name if remote resolving is required + # NOTE: This is actually an extension to the SOCKS4 protocol + # called SOCKS4A and may not be supported in all cases. + if rmtrslv==True: + req = req + destaddr + "\x00" + self.sendall(req) + # Get the response from the server + resp = self.__recvall(8) + if resp[0] != "\x00": + # Bad data + self.close() + raise GeneralProxyError((1,_generalerrors[1])) + if resp[1] != "\x5A": + # Server returned an error + self.close() + if ord(resp[1]) in (91,92,93): + self.close() + raise Socks4Error((ord(resp[1]),_socks4errors[ord(resp[1])-90])) + else: + raise Socks4Error((94,_socks4errors[4])) + # Get the bound address/port + self.__proxysockname = (socket.inet_ntoa(resp[4:]),struct.unpack(">H",resp[2:4])[0]) + if rmtrslv != None: + self.__proxypeername = (socket.inet_ntoa(ipaddr),destport) + else: + self.__proxypeername = (destaddr,destport) + + def __negotiatehttp(self,destaddr,destport): + """__negotiatehttp(self,destaddr,destport) + Negotiates a connection through an HTTP server. + """ + # If we need to resolve locally, we do this now + if self.__proxy[3] == False: + addr = socket.gethostbyname(destaddr) + else: + addr = destaddr + self.sendall("CONNECT " + addr + ":" + str(destport) + " HTTP/1.1\r\n" + "Host: " + destaddr + "\r\n\r\n") + # We read the response until we get the string "\r\n\r\n" + resp = self.recv(1) + while resp.find("\r\n\r\n")==-1: + resp = resp + self.recv(1) + # We just need the first line to check if the connection + # was successful + statusline = resp.splitlines()[0].split(" ",2) + if statusline[0] not in ("HTTP/1.0","HTTP/1.1"): + self.close() + raise GeneralProxyError((1,_generalerrors[1])) + try: + statuscode = int(statusline[1]) + except ValueError: + self.close() + raise GeneralProxyError((1,_generalerrors[1])) + if statuscode != 200: + self.close() + raise HTTPError((statuscode,statusline[2])) + self.__proxysockname = ("0.0.0.0",0) + self.__proxypeername = (addr,destport) + + def connect(self,destpair): + """connect(self,despair) + Connects to the specified destination through a proxy. + destpar - A tuple of the IP/DNS address and the port number. + (identical to socket's connect). + To select the proxy server use setproxy(). + """ + # Do a minimal input check first + if (type(destpair) in (list,tuple)==False) or (len(destpair)<2) or (type(destpair[0])!=str) or (type(destpair[1])!=int): + raise GeneralProxyError((5,_generalerrors[5])) + if self.__proxy[0] == PROXY_TYPE_SOCKS5: + if self.__proxy[2] != None: + portnum = self.__proxy[2] + else: + portnum = 1080 + _orgsocket.connect(self,(self.__proxy[1],portnum)) + self.__negotiatesocks5(destpair[0],destpair[1]) + elif self.__proxy[0] == PROXY_TYPE_SOCKS4: + if self.__proxy[2] != None: + portnum = self.__proxy[2] + else: + portnum = 1080 + _orgsocket.connect(self,(self.__proxy[1],portnum)) + self.__negotiatesocks4(destpair[0],destpair[1]) + elif self.__proxy[0] == PROXY_TYPE_HTTP: + if self.__proxy[2] != None: + portnum = self.__proxy[2] + else: + portnum = 8080 + _orgsocket.connect(self,(self.__proxy[1],portnum)) + self.__negotiatehttp(destpair[0],destpair[1]) + elif self.__proxy[0] == None: + _orgsocket.connect(self,(destpair[0],destpair[1])) + else: + raise GeneralProxyError((4,_generalerrors[4])) From 7dafb384e97a508a4eb48fd0088dfbcbc5b4cd9c Mon Sep 17 00:00:00 2001 From: DoumanAsh Date: Wed, 8 Apr 2015 07:52:02 +0300 Subject: [PATCH 4/5] [search engine] Cosmetic update TorrentReactor --- .../nova/engines/torrentreactor.png | Bin 252 -> 951 bytes src/searchengine/nova/engines/torrentreactor.py | 9 +++++---- src/searchengine/nova/engines/versions.txt | 2 +- .../nova3/engines/torrentreactor.png | Bin 252 -> 951 bytes .../nova3/engines/torrentreactor.py | 9 +++++---- src/searchengine/nova3/engines/versions.txt | 2 +- 6 files changed, 12 insertions(+), 10 deletions(-) diff --git a/src/searchengine/nova/engines/torrentreactor.png b/src/searchengine/nova/engines/torrentreactor.png index d16a023ee28fd55118009205d3f0f42096842412..160726112aec0fef98b66a847e933229d1edcadc 100644 GIT binary patch delta 941 zcmV;e15*6_0k;Q`7=H)`0000V^Z#K0000JJOGiWi{{a60|De66lK=n!32;bRa{vGf z6951U69E94oEQKA16N5zK~y-)ZIfF}TLl1y|8wm*y_{a4r4)s>P&fj@oCNT;%)#au z3yCp4h?&U}9`@#o4`wDN#&=)X5@Ys+iyAVFVKK{0R3Z;%nSYomjFz@*yN#A!&Q5!J zJ-wad6Gn*t)Bo`Q-_!pSx~>yArmbMmt=3EDT55e5ng)hqb_5gk)9C0__IQ@~FJLPe zbSrA}2GLe8B%dasC}prL4V&2rhs6YjV%94}b!qO6Iq9%N9-7~GqbC}mLxP#Rz#CXR z_$z|ZK@mQ`h<}>Fj9^KH+*B6?k`J$~tqmUmd~##m)zb70lhGhpI0IZAR-8ILg}}%d z0x!G>uQ(3$`4P4_&=PFgla%B?0@%FuE4lkH_CP6=!LbZ%W**)yI}+(E@*0U>awXi} zml0t&806_7c5$jl>@6!CB;C&j8_J2#Y222c#okuAM-998+6#mE+ zkS=7AiH9+cI(xFOYu@AOJiWK~_jO|P{?2=3qw>v|lmOp1h`ZT16xt4}(FMA$pr(`f zDpLkg&*6)yQP_+;N@@i)RYkMeUL*|$Myx3cvXMt97wYJ?`OxBfpk$J;F*;6k3wYC5 zN3D>D&wu3v!*U>X0+m7$g>+h^4K&p-@CIzBa%kQD8C$v)b>9Gbxo3EF#0#g*jN>H#gin7*f-g@h`m(XfeVKoxC{?R3T{J}+BJv)g~LWUH|A@=7( zq+?+)cYecC?+~hWZB?9{lxYBHH^?g;R@=GDGtc!;2R zi11P7qATE?m|t8h9Qnej`Pq=&ZS-gMGe2Y!DWo4~@GQPBQ;IaZbmn~Y^LO8ei?e*^ zd%4$p_MMB-|9#`3mai^%7IV^>P$cO1dM0D_%ek@H>HYyReC_*d^6~6H57B0BCW=Fl P00000NkvXXu0mjfMl;Al delta 236 zcmVQ3-0!0}AGcd_BC@?Uv^}T!H!mRk?r(o6=HdZFN4RSLX?q#iwWzuM1IK{y5_dgSp z0mFZWJ>RagfQ(>1!;rzG$iSe?z>wAgmikp-xt~S&A%h@u6|X3hf&kFWKb*l#U>i9L mikKJ}9T;XYlph6J2mt^%r6Q4k&#LkO0000P&fj@oCNT;%)#au z3yCp4h?&U}9`@#o4`wDN#&=)X5@Ys+iyAVFVKK{0R3Z;%nSYomjFz@*yN#A!&Q5!J zJ-wad6Gn*t)Bo`Q-_!pSx~>yArmbMmt=3EDT55e5ng)hqb_5gk)9C0__IQ@~FJLPe zbSrA}2GLe8B%dasC}prL4V&2rhs6YjV%94}b!qO6Iq9%N9-7~GqbC}mLxP#Rz#CXR z_$z|ZK@mQ`h<}>Fj9^KH+*B6?k`J$~tqmUmd~##m)zb70lhGhpI0IZAR-8ILg}}%d z0x!G>uQ(3$`4P4_&=PFgla%B?0@%FuE4lkH_CP6=!LbZ%W**)yI}+(E@*0U>awXi} zml0t&806_7c5$jl>@6!CB;C&j8_J2#Y222c#okuAM-998+6#mE+ zkS=7AiH9+cI(xFOYu@AOJiWK~_jO|P{?2=3qw>v|lmOp1h`ZT16xt4}(FMA$pr(`f zDpLkg&*6)yQP_+;N@@i)RYkMeUL*|$Myx3cvXMt97wYJ?`OxBfpk$J;F*;6k3wYC5 zN3D>D&wu3v!*U>X0+m7$g>+h^4K&p-@CIzBa%kQD8C$v)b>9Gbxo3EF#0#g*jN>H#gin7*f-g@h`m(XfeVKoxC{?R3T{J}+BJv)g~LWUH|A@=7( zq+?+)cYecC?+~hWZB?9{lxYBHH^?g;R@=GDGtc!;2R zi11P7qATE?m|t8h9Qnej`Pq=&ZS-gMGe2Y!DWo4~@GQPBQ;IaZbmn~Y^LO8ei?e*^ zd%4$p_MMB-|9#`3mai^%7IV^>P$cO1dM0D_%ek@H>HYyReC_*d^6~6H57B0BCW=Fl P00000NkvXXu0mjfMl;Al delta 236 zcmVQ3-0!0}AGcd_BC@?Uv^}T!H!mRk?r(o6=HdZFN4RSLX?q#iwWzuM1IK{y5_dgSp z0mFZWJ>RagfQ(>1!;rzG$iSe?z>wAgmikp-xt~S&A%h@u6|X3hf&kFWKb*l#U>i9L mikKJ}9T;XYlph6J2mt^%r6Q4k&#LkO0000 Date: Sat, 11 Apr 2015 10:28:17 +0300 Subject: [PATCH 5/5] [search engine] Final enhancements. --- src/searchengine/nova/engines/extratorrent.py | 6 +++++- src/searchengine/nova/engines/mininova.py | 11 +++++------ src/searchengine/nova/engines/torrentreactor.py | 17 +++-------------- src/searchengine/nova/engines/torrentz.py | 4 ++-- src/searchengine/nova/engines/versions.txt | 2 +- src/searchengine/nova/nova2.py | 6 +++++- src/searchengine/nova/novaprinter.py | 2 +- src/searchengine/nova3/engines/extratorrent.py | 6 +++++- src/searchengine/nova3/engines/mininova.py | 11 +++++------ .../nova3/engines/torrentreactor.py | 17 +++-------------- src/searchengine/nova3/engines/torrentz.py | 2 +- src/searchengine/nova3/engines/versions.txt | 2 +- src/searchengine/nova3/nova2.py | 9 +++++++-- 13 files changed, 44 insertions(+), 51 deletions(-) diff --git a/src/searchengine/nova/engines/extratorrent.py b/src/searchengine/nova/engines/extratorrent.py index 19fce553c..4fe940b24 100644 --- a/src/searchengine/nova/engines/extratorrent.py +++ b/src/searchengine/nova/engines/extratorrent.py @@ -60,6 +60,7 @@ class extratorrent(object): self.pending_size = False self.next_queries = True self.pending_next_queries = False + self.next_queries_set = set() def handle_starttag(self, tag, attrs): if self.current_item: @@ -74,7 +75,7 @@ class extratorrent(object): #description self.current_item["desc_link"] = "".join((self.url, link)) #remove view at the beginning - self.current_item["name"] = params["title"][5:] + self.current_item["name"] = params["title"][5:].replace("&", "&") self.pending_size = True elif link[8] == "_": #download link @@ -108,7 +109,10 @@ class extratorrent(object): elif self.pending_next_queries: if tag == "a": params = dict(attrs) + if params["title"] in self.next_queries_set: + return self.list_searches.append(params['href']) + self.next_queries_set.add(params["title"]) if params["title"] == "10": self.pending_next_queries = False else: diff --git a/src/searchengine/nova/engines/mininova.py b/src/searchengine/nova/engines/mininova.py index dc132cd6c..e105a4f3b 100644 --- a/src/searchengine/nova/engines/mininova.py +++ b/src/searchengine/nova/engines/mininova.py @@ -68,12 +68,11 @@ class mininova(object): params = dict(attrs) link = params["href"] - if link.startswith("/get/"): - #download link - self.current_item["link"] = "".join((self.url, link)) - elif link.startswith("/tor/"): + if link.startswith("/tor/"): #description self.current_item["desc_link"] = "".join((self.url, link)) + #get download link from description by id + self.current_item["link"] = "".join((self.url, "/get/", link[5:-2])) self.cur_item_name = "name" self.current_item["name"] = "" elif self.next_queries and link.startswith("/search"): @@ -83,7 +82,7 @@ class mininova(object): def handle_starttag_td(self, attrs): """ Handler of td start tag """ if ("align", "right") in attrs: - if not "size" in self.current_item.keys(): + if not "size" in self.current_item: self.cur_item_name = "size" self.current_item["size"] = "" @@ -113,7 +112,7 @@ class mininova(object): prettyPrinter(self.current_item) self.current_item = None elif self.cur_item_name: - if tag == "a" or tag == "span": + if tag == "a" or tag == "td": self.cur_item_name = None def handle_data(self, data): diff --git a/src/searchengine/nova/engines/torrentreactor.py b/src/searchengine/nova/engines/torrentreactor.py index bff138f91..3f0ef7f65 100644 --- a/src/searchengine/nova/engines/torrentreactor.py +++ b/src/searchengine/nova/engines/torrentreactor.py @@ -28,10 +28,9 @@ # POSSIBILITY OF SUCH DAMAGE. from novaprinter import prettyPrinter -from helpers import download_file +from helpers import download_file, retrieve_url import urllib from HTMLParser import HTMLParser -from httplib import HTTPConnection as http from re import compile as re_compile class torrentreactor(object): @@ -100,23 +99,13 @@ class torrentreactor(object): def search(self, what, cat='all'): i = 0 dat = '' - connection = http("www.torrentreactor.net") - while True and i<11: + while i < 11: results = [] parser = self.SimpleHTMLParser(results, self.url) - query = '/torrents-search/%s/%d?sort=seeders.desc&type=all&period=none&categories=%s'%(what, (i*35), self.supported_categories[cat]) - connection.request("GET", query) - response = connection.getresponse() - if response.status != 200: - break - - dat = response.read().decode('utf-8') - + dat = retrieve_url('%s/torrent-search/%s/%s?sort=seeders.desc&type=all&period=none&categories=%s'%(self.url, what, (i*35), self.supported_categories[cat])) parser.feed(dat) parser.close() if len(results) <= 0: break i += 1 - - connection.close() diff --git a/src/searchengine/nova/engines/torrentz.py b/src/searchengine/nova/engines/torrentz.py index 2d7b5eef7..20a2b3be8 100644 --- a/src/searchengine/nova/engines/torrentz.py +++ b/src/searchengine/nova/engines/torrentz.py @@ -1,4 +1,4 @@ -#VERSION: 2.13 +#VERSION: 2.14 #AUTHORS: Diego de las Heras (diegodelasheras@gmail.com) # Redistribution and use in source and binary forms, with or without @@ -105,7 +105,7 @@ class torrentz(object): while i < 6: results_list = [] # "what" is already urlencoded - html = retrieve_url(self.url + '/any?f=%s&p=%d' % (what, i)) + html = retrieve_url('%s/any?f=%s&p=%d' % (self.url, what, i)) parser = self.MyHtmlParser(results_list, self.url, trackers) parser.feed(html) parser.close() diff --git a/src/searchengine/nova/engines/versions.txt b/src/searchengine/nova/engines/versions.txt index bbff49afd..c24143cfb 100644 --- a/src/searchengine/nova/engines/versions.txt +++ b/src/searchengine/nova/engines/versions.txt @@ -5,5 +5,5 @@ piratebay: 2.11 extratorrent: 2.0 kickasstorrents: 1.26 btdigg: 1.24 -torrentz: 2.13 +torrentz: 2.14 legittorrents: 1.03 diff --git a/src/searchengine/nova/nova2.py b/src/searchengine/nova/nova2.py index cef9681b7..d54f5c16d 100644 --- a/src/searchengine/nova/nova2.py +++ b/src/searchengine/nova/nova2.py @@ -117,6 +117,8 @@ def displayCapabilities(supported_engines): def run_search(engine_list): """ Run search in engine + @param engine_list List with engine, query and category + @retval False if any exceptions occured @retval True otherwise """ @@ -149,6 +151,7 @@ def main(args): raise SystemExit("./nova2.py [all|engine1[,engine2]*] \n" "available engines: %s" % (','.join(supported_engines))) + #get only unique engines with set engines_list = set(e.lower() for e in args[0].strip().split(',')) if 'all' in engines_list: @@ -170,10 +173,11 @@ def main(args): what = urllib.quote(' '.join(args[2:])) if THREADED: + #child process spawning is controlled min(number of searches, number of cpu) pool = Pool(min(len(engines_list), cpu_count())) pool.map(run_search, ([globals()[engine], what, cat] for engine in engines_list)) else: - _ = [run_search([globals()[engine], what, cat]) for engine in engines_list] + map(run_search, ([globals()[engine], what, cat] for engine in engines_list)) if __name__ == "__main__": main(argv[1:]) diff --git a/src/searchengine/nova/novaprinter.py b/src/searchengine/nova/novaprinter.py index fc16949e6..9cc598f14 100644 --- a/src/searchengine/nova/novaprinter.py +++ b/src/searchengine/nova/novaprinter.py @@ -37,7 +37,7 @@ def prettyPrinter(dictionary): outtext = "|".join((outtext, dictionary["desc_link"])) with open(1, 'w', encoding='utf-8', closefd=False) as utf8_stdout: - utf8_stdout.write("".join((outtext, "\n"))) + utf8_stdout.write(unicode("".join((outtext, "\n")))) def anySizeToBytes(size_string): """ diff --git a/src/searchengine/nova3/engines/extratorrent.py b/src/searchengine/nova3/engines/extratorrent.py index de3dcb9a2..d0bd10bb8 100644 --- a/src/searchengine/nova3/engines/extratorrent.py +++ b/src/searchengine/nova3/engines/extratorrent.py @@ -60,6 +60,7 @@ class extratorrent(object): self.pending_size = False self.next_queries = True self.pending_next_queries = False + self.next_queries_set = set() def handle_starttag(self, tag, attrs): if self.current_item: @@ -74,7 +75,7 @@ class extratorrent(object): #description self.current_item["desc_link"] = "".join((self.url, link)) #remove view at the beginning - self.current_item["name"] = params["title"][5:] + self.current_item["name"] = params["title"][5:].replace("&", "&") self.pending_size = True elif link[8] == "_": #download link @@ -108,7 +109,10 @@ class extratorrent(object): elif self.pending_next_queries: if tag == "a": params = dict(attrs) + if params["title"] in self.next_queries_set: + return self.list_searches.append(params['href']) + self.next_queries_set.add(params["title"]) if params["title"] == "10": self.pending_next_queries = False else: diff --git a/src/searchengine/nova3/engines/mininova.py b/src/searchengine/nova3/engines/mininova.py index 12544db09..b402c70c1 100644 --- a/src/searchengine/nova3/engines/mininova.py +++ b/src/searchengine/nova3/engines/mininova.py @@ -68,12 +68,11 @@ class mininova(object): params = dict(attrs) link = params["href"] - if link.startswith("/get/"): - #download link - self.current_item["link"] = "".join((self.url, link)) - elif link.startswith("/tor/"): + if link.startswith("/tor/"): #description self.current_item["desc_link"] = "".join((self.url, link)) + #get download link from description by id + self.current_item["link"] = "".join((self.url, "/get/", link[5:-2])) self.cur_item_name = "name" self.current_item["name"] = "" elif self.next_queries and link.startswith("/search"): @@ -83,7 +82,7 @@ class mininova(object): def handle_starttag_td(self, attrs): """ Handler of td start tag """ if ("align", "right") in attrs: - if not "size" in self.current_item.keys(): + if not "size" in self.current_item: self.cur_item_name = "size" self.current_item["size"] = "" @@ -113,7 +112,7 @@ class mininova(object): prettyPrinter(self.current_item) self.current_item = None elif self.cur_item_name: - if tag == "a" or tag == "span": + if tag == "a" or tag == "td": self.cur_item_name = None def handle_data(self, data): diff --git a/src/searchengine/nova3/engines/torrentreactor.py b/src/searchengine/nova3/engines/torrentreactor.py index e4005663c..6782ae450 100644 --- a/src/searchengine/nova3/engines/torrentreactor.py +++ b/src/searchengine/nova3/engines/torrentreactor.py @@ -28,10 +28,9 @@ # POSSIBILITY OF SUCH DAMAGE. from novaprinter import prettyPrinter -from helpers import download_file +from helpers import download_file, retrieve_url from urllib import parse from html.parser import HTMLParser -from http.client import HTTPConnection as http from re import compile as re_compile class torrentreactor(object): @@ -100,23 +99,13 @@ class torrentreactor(object): def search(self, what, cat='all'): i = 0 dat = '' - connection = http("www.torrentreactor.net") - while True and i<11: + while i < 11: results = [] parser = self.SimpleHTMLParser(results, self.url) - query = '/torrents-search/%s/%d?sort=seeders.desc&type=all&period=none&categories=%s'%(what, (i*35), self.supported_categories[cat]) - connection.request("GET", query) - response = connection.getresponse() - if response.status != 200: - break - - dat = response.read().decode('utf-8') - + dat = retrieve_url('%s/torrent-search/%s/%s?sort=seeders.desc&type=all&period=none&categories=%s'%(self.url, what, (i*35), self.supported_categories[cat])) parser.feed(dat) parser.close() if len(results) <= 0: break i += 1 - - connection.close() diff --git a/src/searchengine/nova3/engines/torrentz.py b/src/searchengine/nova3/engines/torrentz.py index 9ced90268..d6c117f42 100644 --- a/src/searchengine/nova3/engines/torrentz.py +++ b/src/searchengine/nova3/engines/torrentz.py @@ -1,4 +1,4 @@ -#VERSION: 2.13 +#VERSION: 2.14 #AUTHORS: Diego de las Heras (diegodelasheras@gmail.com) # Redistribution and use in source and binary forms, with or without diff --git a/src/searchengine/nova3/engines/versions.txt b/src/searchengine/nova3/engines/versions.txt index 479082923..b5300792a 100644 --- a/src/searchengine/nova3/engines/versions.txt +++ b/src/searchengine/nova3/engines/versions.txt @@ -5,5 +5,5 @@ piratebay: 2.11 extratorrent: 2.0 kickasstorrents: 1.26 btdigg: 1.23 -torrentz: 2.13 +torrentz: 2.14 legittorrents: 1.04 diff --git a/src/searchengine/nova3/nova2.py b/src/searchengine/nova3/nova2.py index c67852db3..3b483fdd7 100644 --- a/src/searchengine/nova3/nova2.py +++ b/src/searchengine/nova3/nova2.py @@ -116,6 +116,8 @@ def displayCapabilities(supported_engines): def run_search(engine_list): """ Run search in engine + @param engine_list List with engine, query and category + @retval False if any exceptions occured @retval True otherwise """ @@ -128,6 +130,7 @@ def run_search(engine_list): engine.search(what, cat) else: engine.search(what) + return True except: return False @@ -147,6 +150,7 @@ def main(args): raise SystemExit("./nova2.py [all|engine1[,engine2]*] \n" "available engines: %s" % (','.join(supported_engines))) + #get only unique engines with set engines_list = set(e.lower() for e in args[0].strip().split(',')) if 'all' in engines_list: @@ -166,12 +170,13 @@ def main(args): raise SystemExit(" - ".join(('Invalid category', cat))) what = urllib.parse.quote(' '.join(args[2:])) - if THREADED: + #child process spawning is controlled min(number of searches, number of cpu) with Pool(min(len(engines_list), cpu_count())) as pool: pool.map(run_search, ([globals()[engine], what, cat] for engine in engines_list)) else: - _ = [run_search([globals()[engine], what, cat]) for engine in engines_list] + #py3 note: map is needed to be evaluated for content to be executed + all(map(run_search, ([globals()[engine], what, cat] for engine in engines_list))) if __name__ == "__main__": main(argv[1:])