mirror of
https://github.com/qbittorrent/qBittorrent
synced 2025-08-21 22:03:27 -07:00
[search engine] Final enhancements.
This commit is contained in:
parent
639f5b2d20
commit
09dedd0f22
13 changed files with 44 additions and 51 deletions
|
@ -60,6 +60,7 @@ class extratorrent(object):
|
||||||
self.pending_size = False
|
self.pending_size = False
|
||||||
self.next_queries = True
|
self.next_queries = True
|
||||||
self.pending_next_queries = False
|
self.pending_next_queries = False
|
||||||
|
self.next_queries_set = set()
|
||||||
|
|
||||||
def handle_starttag(self, tag, attrs):
|
def handle_starttag(self, tag, attrs):
|
||||||
if self.current_item:
|
if self.current_item:
|
||||||
|
@ -74,7 +75,7 @@ class extratorrent(object):
|
||||||
#description
|
#description
|
||||||
self.current_item["desc_link"] = "".join((self.url, link))
|
self.current_item["desc_link"] = "".join((self.url, link))
|
||||||
#remove view at the beginning
|
#remove view at the beginning
|
||||||
self.current_item["name"] = params["title"][5:]
|
self.current_item["name"] = params["title"][5:].replace("&", "&")
|
||||||
self.pending_size = True
|
self.pending_size = True
|
||||||
elif link[8] == "_":
|
elif link[8] == "_":
|
||||||
#download link
|
#download link
|
||||||
|
@ -108,7 +109,10 @@ class extratorrent(object):
|
||||||
elif self.pending_next_queries:
|
elif self.pending_next_queries:
|
||||||
if tag == "a":
|
if tag == "a":
|
||||||
params = dict(attrs)
|
params = dict(attrs)
|
||||||
|
if params["title"] in self.next_queries_set:
|
||||||
|
return
|
||||||
self.list_searches.append(params['href'])
|
self.list_searches.append(params['href'])
|
||||||
|
self.next_queries_set.add(params["title"])
|
||||||
if params["title"] == "10":
|
if params["title"] == "10":
|
||||||
self.pending_next_queries = False
|
self.pending_next_queries = False
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -68,12 +68,11 @@ class mininova(object):
|
||||||
params = dict(attrs)
|
params = dict(attrs)
|
||||||
link = params["href"]
|
link = params["href"]
|
||||||
|
|
||||||
if link.startswith("/get/"):
|
if link.startswith("/tor/"):
|
||||||
#download link
|
|
||||||
self.current_item["link"] = "".join((self.url, link))
|
|
||||||
elif link.startswith("/tor/"):
|
|
||||||
#description
|
#description
|
||||||
self.current_item["desc_link"] = "".join((self.url, link))
|
self.current_item["desc_link"] = "".join((self.url, link))
|
||||||
|
#get download link from description by id
|
||||||
|
self.current_item["link"] = "".join((self.url, "/get/", link[5:-2]))
|
||||||
self.cur_item_name = "name"
|
self.cur_item_name = "name"
|
||||||
self.current_item["name"] = ""
|
self.current_item["name"] = ""
|
||||||
elif self.next_queries and link.startswith("/search"):
|
elif self.next_queries and link.startswith("/search"):
|
||||||
|
@ -83,7 +82,7 @@ class mininova(object):
|
||||||
def handle_starttag_td(self, attrs):
|
def handle_starttag_td(self, attrs):
|
||||||
""" Handler of td start tag """
|
""" Handler of td start tag """
|
||||||
if ("align", "right") in attrs:
|
if ("align", "right") in attrs:
|
||||||
if not "size" in self.current_item.keys():
|
if not "size" in self.current_item:
|
||||||
self.cur_item_name = "size"
|
self.cur_item_name = "size"
|
||||||
self.current_item["size"] = ""
|
self.current_item["size"] = ""
|
||||||
|
|
||||||
|
@ -113,7 +112,7 @@ class mininova(object):
|
||||||
prettyPrinter(self.current_item)
|
prettyPrinter(self.current_item)
|
||||||
self.current_item = None
|
self.current_item = None
|
||||||
elif self.cur_item_name:
|
elif self.cur_item_name:
|
||||||
if tag == "a" or tag == "span":
|
if tag == "a" or tag == "td":
|
||||||
self.cur_item_name = None
|
self.cur_item_name = None
|
||||||
|
|
||||||
def handle_data(self, data):
|
def handle_data(self, data):
|
||||||
|
|
|
@ -28,10 +28,9 @@
|
||||||
# POSSIBILITY OF SUCH DAMAGE.
|
# POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
from novaprinter import prettyPrinter
|
from novaprinter import prettyPrinter
|
||||||
from helpers import download_file
|
from helpers import download_file, retrieve_url
|
||||||
import urllib
|
import urllib
|
||||||
from HTMLParser import HTMLParser
|
from HTMLParser import HTMLParser
|
||||||
from httplib import HTTPConnection as http
|
|
||||||
from re import compile as re_compile
|
from re import compile as re_compile
|
||||||
|
|
||||||
class torrentreactor(object):
|
class torrentreactor(object):
|
||||||
|
@ -100,23 +99,13 @@ class torrentreactor(object):
|
||||||
def search(self, what, cat='all'):
|
def search(self, what, cat='all'):
|
||||||
i = 0
|
i = 0
|
||||||
dat = ''
|
dat = ''
|
||||||
connection = http("www.torrentreactor.net")
|
|
||||||
|
|
||||||
while True and i<11:
|
while i < 11:
|
||||||
results = []
|
results = []
|
||||||
parser = self.SimpleHTMLParser(results, self.url)
|
parser = self.SimpleHTMLParser(results, self.url)
|
||||||
query = '/torrents-search/%s/%d?sort=seeders.desc&type=all&period=none&categories=%s'%(what, (i*35), self.supported_categories[cat])
|
dat = retrieve_url('%s/torrent-search/%s/%s?sort=seeders.desc&type=all&period=none&categories=%s'%(self.url, what, (i*35), self.supported_categories[cat]))
|
||||||
connection.request("GET", query)
|
|
||||||
response = connection.getresponse()
|
|
||||||
if response.status != 200:
|
|
||||||
break
|
|
||||||
|
|
||||||
dat = response.read().decode('utf-8')
|
|
||||||
|
|
||||||
parser.feed(dat)
|
parser.feed(dat)
|
||||||
parser.close()
|
parser.close()
|
||||||
if len(results) <= 0:
|
if len(results) <= 0:
|
||||||
break
|
break
|
||||||
i += 1
|
i += 1
|
||||||
|
|
||||||
connection.close()
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
#VERSION: 2.13
|
#VERSION: 2.14
|
||||||
#AUTHORS: Diego de las Heras (diegodelasheras@gmail.com)
|
#AUTHORS: Diego de las Heras (diegodelasheras@gmail.com)
|
||||||
|
|
||||||
# Redistribution and use in source and binary forms, with or without
|
# Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -105,7 +105,7 @@ class torrentz(object):
|
||||||
while i < 6:
|
while i < 6:
|
||||||
results_list = []
|
results_list = []
|
||||||
# "what" is already urlencoded
|
# "what" is already urlencoded
|
||||||
html = retrieve_url(self.url + '/any?f=%s&p=%d' % (what, i))
|
html = retrieve_url('%s/any?f=%s&p=%d' % (self.url, what, i))
|
||||||
parser = self.MyHtmlParser(results_list, self.url, trackers)
|
parser = self.MyHtmlParser(results_list, self.url, trackers)
|
||||||
parser.feed(html)
|
parser.feed(html)
|
||||||
parser.close()
|
parser.close()
|
||||||
|
|
|
@ -5,5 +5,5 @@ piratebay: 2.11
|
||||||
extratorrent: 2.0
|
extratorrent: 2.0
|
||||||
kickasstorrents: 1.26
|
kickasstorrents: 1.26
|
||||||
btdigg: 1.24
|
btdigg: 1.24
|
||||||
torrentz: 2.13
|
torrentz: 2.14
|
||||||
legittorrents: 1.03
|
legittorrents: 1.03
|
||||||
|
|
|
@ -117,6 +117,8 @@ def displayCapabilities(supported_engines):
|
||||||
def run_search(engine_list):
|
def run_search(engine_list):
|
||||||
""" Run search in engine
|
""" Run search in engine
|
||||||
|
|
||||||
|
@param engine_list List with engine, query and category
|
||||||
|
|
||||||
@retval False if any exceptions occured
|
@retval False if any exceptions occured
|
||||||
@retval True otherwise
|
@retval True otherwise
|
||||||
"""
|
"""
|
||||||
|
@ -149,6 +151,7 @@ def main(args):
|
||||||
raise SystemExit("./nova2.py [all|engine1[,engine2]*] <category> <keywords>\n"
|
raise SystemExit("./nova2.py [all|engine1[,engine2]*] <category> <keywords>\n"
|
||||||
"available engines: %s" % (','.join(supported_engines)))
|
"available engines: %s" % (','.join(supported_engines)))
|
||||||
|
|
||||||
|
#get only unique engines with set
|
||||||
engines_list = set(e.lower() for e in args[0].strip().split(','))
|
engines_list = set(e.lower() for e in args[0].strip().split(','))
|
||||||
|
|
||||||
if 'all' in engines_list:
|
if 'all' in engines_list:
|
||||||
|
@ -170,10 +173,11 @@ def main(args):
|
||||||
what = urllib.quote(' '.join(args[2:]))
|
what = urllib.quote(' '.join(args[2:]))
|
||||||
|
|
||||||
if THREADED:
|
if THREADED:
|
||||||
|
#child process spawning is controlled min(number of searches, number of cpu)
|
||||||
pool = Pool(min(len(engines_list), cpu_count()))
|
pool = Pool(min(len(engines_list), cpu_count()))
|
||||||
pool.map(run_search, ([globals()[engine], what, cat] for engine in engines_list))
|
pool.map(run_search, ([globals()[engine], what, cat] for engine in engines_list))
|
||||||
else:
|
else:
|
||||||
_ = [run_search([globals()[engine], what, cat]) for engine in engines_list]
|
map(run_search, ([globals()[engine], what, cat] for engine in engines_list))
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main(argv[1:])
|
main(argv[1:])
|
||||||
|
|
|
@ -37,7 +37,7 @@ def prettyPrinter(dictionary):
|
||||||
outtext = "|".join((outtext, dictionary["desc_link"]))
|
outtext = "|".join((outtext, dictionary["desc_link"]))
|
||||||
|
|
||||||
with open(1, 'w', encoding='utf-8', closefd=False) as utf8_stdout:
|
with open(1, 'w', encoding='utf-8', closefd=False) as utf8_stdout:
|
||||||
utf8_stdout.write("".join((outtext, "\n")))
|
utf8_stdout.write(unicode("".join((outtext, "\n"))))
|
||||||
|
|
||||||
def anySizeToBytes(size_string):
|
def anySizeToBytes(size_string):
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -60,6 +60,7 @@ class extratorrent(object):
|
||||||
self.pending_size = False
|
self.pending_size = False
|
||||||
self.next_queries = True
|
self.next_queries = True
|
||||||
self.pending_next_queries = False
|
self.pending_next_queries = False
|
||||||
|
self.next_queries_set = set()
|
||||||
|
|
||||||
def handle_starttag(self, tag, attrs):
|
def handle_starttag(self, tag, attrs):
|
||||||
if self.current_item:
|
if self.current_item:
|
||||||
|
@ -74,7 +75,7 @@ class extratorrent(object):
|
||||||
#description
|
#description
|
||||||
self.current_item["desc_link"] = "".join((self.url, link))
|
self.current_item["desc_link"] = "".join((self.url, link))
|
||||||
#remove view at the beginning
|
#remove view at the beginning
|
||||||
self.current_item["name"] = params["title"][5:]
|
self.current_item["name"] = params["title"][5:].replace("&", "&")
|
||||||
self.pending_size = True
|
self.pending_size = True
|
||||||
elif link[8] == "_":
|
elif link[8] == "_":
|
||||||
#download link
|
#download link
|
||||||
|
@ -108,7 +109,10 @@ class extratorrent(object):
|
||||||
elif self.pending_next_queries:
|
elif self.pending_next_queries:
|
||||||
if tag == "a":
|
if tag == "a":
|
||||||
params = dict(attrs)
|
params = dict(attrs)
|
||||||
|
if params["title"] in self.next_queries_set:
|
||||||
|
return
|
||||||
self.list_searches.append(params['href'])
|
self.list_searches.append(params['href'])
|
||||||
|
self.next_queries_set.add(params["title"])
|
||||||
if params["title"] == "10":
|
if params["title"] == "10":
|
||||||
self.pending_next_queries = False
|
self.pending_next_queries = False
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -68,12 +68,11 @@ class mininova(object):
|
||||||
params = dict(attrs)
|
params = dict(attrs)
|
||||||
link = params["href"]
|
link = params["href"]
|
||||||
|
|
||||||
if link.startswith("/get/"):
|
if link.startswith("/tor/"):
|
||||||
#download link
|
|
||||||
self.current_item["link"] = "".join((self.url, link))
|
|
||||||
elif link.startswith("/tor/"):
|
|
||||||
#description
|
#description
|
||||||
self.current_item["desc_link"] = "".join((self.url, link))
|
self.current_item["desc_link"] = "".join((self.url, link))
|
||||||
|
#get download link from description by id
|
||||||
|
self.current_item["link"] = "".join((self.url, "/get/", link[5:-2]))
|
||||||
self.cur_item_name = "name"
|
self.cur_item_name = "name"
|
||||||
self.current_item["name"] = ""
|
self.current_item["name"] = ""
|
||||||
elif self.next_queries and link.startswith("/search"):
|
elif self.next_queries and link.startswith("/search"):
|
||||||
|
@ -83,7 +82,7 @@ class mininova(object):
|
||||||
def handle_starttag_td(self, attrs):
|
def handle_starttag_td(self, attrs):
|
||||||
""" Handler of td start tag """
|
""" Handler of td start tag """
|
||||||
if ("align", "right") in attrs:
|
if ("align", "right") in attrs:
|
||||||
if not "size" in self.current_item.keys():
|
if not "size" in self.current_item:
|
||||||
self.cur_item_name = "size"
|
self.cur_item_name = "size"
|
||||||
self.current_item["size"] = ""
|
self.current_item["size"] = ""
|
||||||
|
|
||||||
|
@ -113,7 +112,7 @@ class mininova(object):
|
||||||
prettyPrinter(self.current_item)
|
prettyPrinter(self.current_item)
|
||||||
self.current_item = None
|
self.current_item = None
|
||||||
elif self.cur_item_name:
|
elif self.cur_item_name:
|
||||||
if tag == "a" or tag == "span":
|
if tag == "a" or tag == "td":
|
||||||
self.cur_item_name = None
|
self.cur_item_name = None
|
||||||
|
|
||||||
def handle_data(self, data):
|
def handle_data(self, data):
|
||||||
|
|
|
@ -28,10 +28,9 @@
|
||||||
# POSSIBILITY OF SUCH DAMAGE.
|
# POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
from novaprinter import prettyPrinter
|
from novaprinter import prettyPrinter
|
||||||
from helpers import download_file
|
from helpers import download_file, retrieve_url
|
||||||
from urllib import parse
|
from urllib import parse
|
||||||
from html.parser import HTMLParser
|
from html.parser import HTMLParser
|
||||||
from http.client import HTTPConnection as http
|
|
||||||
from re import compile as re_compile
|
from re import compile as re_compile
|
||||||
|
|
||||||
class torrentreactor(object):
|
class torrentreactor(object):
|
||||||
|
@ -100,23 +99,13 @@ class torrentreactor(object):
|
||||||
def search(self, what, cat='all'):
|
def search(self, what, cat='all'):
|
||||||
i = 0
|
i = 0
|
||||||
dat = ''
|
dat = ''
|
||||||
connection = http("www.torrentreactor.net")
|
|
||||||
|
|
||||||
while True and i<11:
|
while i < 11:
|
||||||
results = []
|
results = []
|
||||||
parser = self.SimpleHTMLParser(results, self.url)
|
parser = self.SimpleHTMLParser(results, self.url)
|
||||||
query = '/torrents-search/%s/%d?sort=seeders.desc&type=all&period=none&categories=%s'%(what, (i*35), self.supported_categories[cat])
|
dat = retrieve_url('%s/torrent-search/%s/%s?sort=seeders.desc&type=all&period=none&categories=%s'%(self.url, what, (i*35), self.supported_categories[cat]))
|
||||||
connection.request("GET", query)
|
|
||||||
response = connection.getresponse()
|
|
||||||
if response.status != 200:
|
|
||||||
break
|
|
||||||
|
|
||||||
dat = response.read().decode('utf-8')
|
|
||||||
|
|
||||||
parser.feed(dat)
|
parser.feed(dat)
|
||||||
parser.close()
|
parser.close()
|
||||||
if len(results) <= 0:
|
if len(results) <= 0:
|
||||||
break
|
break
|
||||||
i += 1
|
i += 1
|
||||||
|
|
||||||
connection.close()
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
#VERSION: 2.13
|
#VERSION: 2.14
|
||||||
#AUTHORS: Diego de las Heras (diegodelasheras@gmail.com)
|
#AUTHORS: Diego de las Heras (diegodelasheras@gmail.com)
|
||||||
|
|
||||||
# Redistribution and use in source and binary forms, with or without
|
# Redistribution and use in source and binary forms, with or without
|
||||||
|
|
|
@ -5,5 +5,5 @@ piratebay: 2.11
|
||||||
extratorrent: 2.0
|
extratorrent: 2.0
|
||||||
kickasstorrents: 1.26
|
kickasstorrents: 1.26
|
||||||
btdigg: 1.23
|
btdigg: 1.23
|
||||||
torrentz: 2.13
|
torrentz: 2.14
|
||||||
legittorrents: 1.04
|
legittorrents: 1.04
|
||||||
|
|
|
@ -116,6 +116,8 @@ def displayCapabilities(supported_engines):
|
||||||
def run_search(engine_list):
|
def run_search(engine_list):
|
||||||
""" Run search in engine
|
""" Run search in engine
|
||||||
|
|
||||||
|
@param engine_list List with engine, query and category
|
||||||
|
|
||||||
@retval False if any exceptions occured
|
@retval False if any exceptions occured
|
||||||
@retval True otherwise
|
@retval True otherwise
|
||||||
"""
|
"""
|
||||||
|
@ -128,6 +130,7 @@ def run_search(engine_list):
|
||||||
engine.search(what, cat)
|
engine.search(what, cat)
|
||||||
else:
|
else:
|
||||||
engine.search(what)
|
engine.search(what)
|
||||||
|
|
||||||
return True
|
return True
|
||||||
except:
|
except:
|
||||||
return False
|
return False
|
||||||
|
@ -147,6 +150,7 @@ def main(args):
|
||||||
raise SystemExit("./nova2.py [all|engine1[,engine2]*] <category> <keywords>\n"
|
raise SystemExit("./nova2.py [all|engine1[,engine2]*] <category> <keywords>\n"
|
||||||
"available engines: %s" % (','.join(supported_engines)))
|
"available engines: %s" % (','.join(supported_engines)))
|
||||||
|
|
||||||
|
#get only unique engines with set
|
||||||
engines_list = set(e.lower() for e in args[0].strip().split(','))
|
engines_list = set(e.lower() for e in args[0].strip().split(','))
|
||||||
|
|
||||||
if 'all' in engines_list:
|
if 'all' in engines_list:
|
||||||
|
@ -166,12 +170,13 @@ def main(args):
|
||||||
raise SystemExit(" - ".join(('Invalid category', cat)))
|
raise SystemExit(" - ".join(('Invalid category', cat)))
|
||||||
|
|
||||||
what = urllib.parse.quote(' '.join(args[2:]))
|
what = urllib.parse.quote(' '.join(args[2:]))
|
||||||
|
|
||||||
if THREADED:
|
if THREADED:
|
||||||
|
#child process spawning is controlled min(number of searches, number of cpu)
|
||||||
with Pool(min(len(engines_list), cpu_count())) as pool:
|
with Pool(min(len(engines_list), cpu_count())) as pool:
|
||||||
pool.map(run_search, ([globals()[engine], what, cat] for engine in engines_list))
|
pool.map(run_search, ([globals()[engine], what, cat] for engine in engines_list))
|
||||||
else:
|
else:
|
||||||
_ = [run_search([globals()[engine], what, cat]) for engine in engines_list]
|
#py3 note: map is needed to be evaluated for content to be executed
|
||||||
|
all(map(run_search, ([globals()[engine], what, cat] for engine in engines_list)))
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main(argv[1:])
|
main(argv[1:])
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue