mirror of
https://github.com/qbittorrent/qBittorrent
synced 2025-07-31 12:00:16 -07:00
Merge pull request #2550 from DoumanAsh/multiprocessor_search
[search engine] Replace threading with multiprocessing
This commit is contained in:
commit
3d40834c57
22 changed files with 1650 additions and 1470 deletions
|
@ -1,4 +1,4 @@
|
|||
#VERSION: 1.2
|
||||
#VERSION: 2.0
|
||||
#AUTHORS: Christophe Dumez (chris@qbittorrent.org)
|
||||
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
|
@ -25,92 +25,139 @@
|
|||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
|
||||
from HTMLParser import HTMLParser
|
||||
from httplib import HTTPConnection as http
|
||||
#qBt
|
||||
from novaprinter import prettyPrinter
|
||||
from helpers import retrieve_url, download_file
|
||||
import sgmllib
|
||||
import re
|
||||
from helpers import download_file
|
||||
|
||||
class extratorrent(object):
|
||||
""" Search engine class """
|
||||
url = 'http://extratorrent.cc'
|
||||
name = 'extratorrent'
|
||||
supported_categories = {'all': '', 'movies': '4', 'tv': '8', 'music': '5', 'games': '3', 'anime': '1', 'software': '7', 'books': '2', 'pictures': '6'}
|
||||
|
||||
def __init__(self):
|
||||
self.results = []
|
||||
self.parser = self.SimpleSGMLParser(self.results, self.url)
|
||||
name = 'ExtraTorrent'
|
||||
supported_categories = {'all' : '0',
|
||||
'movies' : '4',
|
||||
'tv' : '8',
|
||||
'music' : '5',
|
||||
'games' : '3',
|
||||
'anime' : '1',
|
||||
'software' : '7',
|
||||
'books' : '2',
|
||||
'pictures' : '6'}
|
||||
|
||||
def download_torrent(self, info):
|
||||
print download_file(info)
|
||||
""" Downloader """
|
||||
print(download_file(info))
|
||||
|
||||
class SimpleSGMLParser(sgmllib.SGMLParser):
|
||||
def __init__(self, results, url, *args):
|
||||
sgmllib.SGMLParser.__init__(self)
|
||||
class MyHtmlParseWithBlackJack(HTMLParser):
|
||||
""" Parser class """
|
||||
def __init__(self, list_searches, url):
|
||||
HTMLParser.__init__(self)
|
||||
self.url = url
|
||||
self.td_counter = None
|
||||
self.list_searches = list_searches
|
||||
self.current_item = None
|
||||
self.start_name = False
|
||||
self.results = results
|
||||
self.cur_item_name = None
|
||||
self.pending_size = False
|
||||
self.next_queries = True
|
||||
self.pending_next_queries = False
|
||||
self.next_queries_set = set()
|
||||
|
||||
def start_a(self, attr):
|
||||
params = dict(attr)
|
||||
#print params
|
||||
if params.has_key('href') and params['href'].startswith("/torrent_download/"):
|
||||
self.current_item = {}
|
||||
self.td_counter = 0
|
||||
self.start_name = False
|
||||
torrent_id = '/'.join(params['href'].split('/')[2:])
|
||||
self.current_item['link']=self.url+'/download/'+torrent_id
|
||||
elif params.has_key('href') and params['href'].startswith("/torrent/") and params['href'].endswith(".html"):
|
||||
self.current_item['desc_link'] = self.url + params['href'].strip()
|
||||
self.start_name = True
|
||||
def handle_starttag(self, tag, attrs):
|
||||
if self.current_item:
|
||||
if tag == "a":
|
||||
params = dict(attrs)
|
||||
link = params['href']
|
||||
|
||||
if not link.startswith("/torrent"):
|
||||
return
|
||||
|
||||
if link[8] == "/":
|
||||
#description
|
||||
self.current_item["desc_link"] = "".join((self.url, link))
|
||||
#remove view at the beginning
|
||||
self.current_item["name"] = params["title"][5:].replace("&", "&")
|
||||
self.pending_size = True
|
||||
elif link[8] == "_":
|
||||
#download link
|
||||
link = link.replace("torrent_", "", 1)
|
||||
self.current_item["link"] = "".join((self.url, link))
|
||||
|
||||
elif tag == "td":
|
||||
if self.pending_size:
|
||||
self.cur_item_name = "size"
|
||||
self.current_item["size"] = ""
|
||||
self.pending_size = False
|
||||
|
||||
for attr in attrs:
|
||||
if attr[0] == "class":
|
||||
if attr[1][0] == "s":
|
||||
self.cur_item_name = "seeds"
|
||||
self.current_item["seeds"] = ""
|
||||
elif attr[1][0] == "l":
|
||||
self.cur_item_name = "leech"
|
||||
self.current_item["leech"] = ""
|
||||
break
|
||||
|
||||
|
||||
elif tag == "tr":
|
||||
for attr in attrs:
|
||||
if attr[0] == "class" and attr[1].startswith("tl"):
|
||||
self.current_item = dict()
|
||||
self.current_item["engine_url"] = self.url
|
||||
break
|
||||
|
||||
elif self.pending_next_queries:
|
||||
if tag == "a":
|
||||
params = dict(attrs)
|
||||
if params["title"] in self.next_queries_set:
|
||||
return
|
||||
self.list_searches.append(params['href'])
|
||||
self.next_queries_set.add(params["title"])
|
||||
if params["title"] == "10":
|
||||
self.pending_next_queries = False
|
||||
else:
|
||||
self.pending_next_queries = False
|
||||
|
||||
elif self.next_queries:
|
||||
if tag == "b" and ("class", "pager_no_link") in attrs:
|
||||
self.next_queries = False
|
||||
self.pending_next_queries = True
|
||||
|
||||
def handle_data(self, data):
|
||||
if self.td_counter == 2:
|
||||
if not self.current_item.has_key('name') and self.start_name:
|
||||
self.current_item['name'] = data.strip()
|
||||
elif self.td_counter == 3:
|
||||
if not self.current_item.has_key('size'):
|
||||
self.current_item['size'] = ''
|
||||
self.current_item['size']+= data.replace(" ", " ").strip()
|
||||
elif self.td_counter == 4:
|
||||
if not self.current_item.has_key('seeds'):
|
||||
self.current_item['seeds'] = ''
|
||||
self.current_item['seeds']+= data.strip()
|
||||
elif self.td_counter == 5:
|
||||
if not self.current_item.has_key('leech'):
|
||||
self.current_item['leech'] = ''
|
||||
self.current_item['leech']+= data.strip()
|
||||
if self.cur_item_name:
|
||||
temp = self.current_item[self.cur_item_name]
|
||||
self.current_item[self.cur_item_name] = " ".join((temp, data))
|
||||
#Due to utf-8 we need to handle data two times if there is space
|
||||
if not self.cur_item_name == "size":
|
||||
self.cur_item_name = None
|
||||
|
||||
def start_td(self,attr):
|
||||
if isinstance(self.td_counter,int):
|
||||
self.td_counter += 1
|
||||
if self.td_counter > 5:
|
||||
self.td_counter = None
|
||||
# Display item
|
||||
def handle_endtag(self, tag):
|
||||
if self.current_item:
|
||||
self.current_item['engine_url'] = self.url
|
||||
if not self.current_item['seeds'].isdigit():
|
||||
self.current_item['seeds'] = 0
|
||||
if not self.current_item['leech'].isdigit():
|
||||
self.current_item['leech'] = 0
|
||||
if tag == "tr":
|
||||
prettyPrinter(self.current_item)
|
||||
self.results.append('a')
|
||||
self.current_item = None
|
||||
|
||||
def search(self, what, cat='all'):
|
||||
ret = []
|
||||
i = 1
|
||||
while True and i<11:
|
||||
results = []
|
||||
parser = self.SimpleSGMLParser(results, self.url)
|
||||
dat = retrieve_url(self.url+'/advanced_search/?with=%s&s_cat=%s&page=%d'%(what, self.supported_categories[cat], i))
|
||||
results_re = re.compile('(?s)<table class="tl"><thead>.*')
|
||||
for match in results_re.finditer(dat):
|
||||
res_tab = match.group(0)
|
||||
parser.feed(res_tab)
|
||||
def search(self, what, cat="all"):
|
||||
""" Performs search """
|
||||
connection = http("extratorrent.cc")
|
||||
|
||||
query = "".join(("/search/?new=1&search=", what, "&s_cat=", self.supported_categories[cat]))
|
||||
|
||||
connection.request("GET", query)
|
||||
response = connection.getresponse()
|
||||
if response.status != 200:
|
||||
return
|
||||
|
||||
list_searches = []
|
||||
parser = self.MyHtmlParseWithBlackJack(list_searches, self.url)
|
||||
parser.feed(response.read().decode('utf-8'))
|
||||
parser.close()
|
||||
break
|
||||
if len(results) <= 0:
|
||||
break
|
||||
i += 1
|
||||
|
||||
for search_query in list_searches:
|
||||
connection.request("GET", search_query)
|
||||
response = connection.getresponse()
|
||||
parser.feed(response.read().decode('utf-8'))
|
||||
parser.close()
|
||||
|
||||
connection.close()
|
||||
return
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
#VERSION: 1.02
|
||||
#VERSION: 1.03
|
||||
#AUTHORS: Christophe Dumez (chris@qbittorrent.org)
|
||||
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
|
@ -36,10 +36,6 @@ class legittorrents(object):
|
|||
name = 'legittorrents'
|
||||
supported_categories = {'all': '', 'movies': '1', 'tv': '13', 'music': '2', 'games': '3', 'anime': '5', 'books': '6'}
|
||||
|
||||
def __init__(self):
|
||||
self.results = []
|
||||
self.parser = self.SimpleSGMLParser(self.results, self.url)
|
||||
|
||||
def download_torrent(self, info):
|
||||
print download_file(info)
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
#VERSION: 1.51
|
||||
#VERSION: 2.00
|
||||
#AUTHORS: Christophe Dumez (chris@qbittorrent.org)
|
||||
#CONTRIBUTORS: Diego de las Heras (diegodelasheras@gmail.com)
|
||||
|
||||
|
@ -26,90 +26,123 @@
|
|||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
from HTMLParser import HTMLParser
|
||||
from httplib import HTTPConnection as http
|
||||
from novaprinter import prettyPrinter
|
||||
from helpers import retrieve_url, download_file
|
||||
import sgmllib
|
||||
import re
|
||||
from helpers import download_file
|
||||
|
||||
class mininova(object):
|
||||
# Mandatory properties
|
||||
""" Search engine class """
|
||||
url = 'http://www.mininova.org'
|
||||
name = 'Mininova'
|
||||
supported_categories = {'all': '0', 'movies': '4', 'tv': '8', 'music': '5', 'games': '3', 'anime': '1', 'software': '7', 'pictures': '6', 'books': '2'}
|
||||
|
||||
def __init__(self):
|
||||
self.results = []
|
||||
self.parser = self.SimpleSGMLParser(self.results, self.url)
|
||||
supported_categories = {'all' : '0',
|
||||
'movies' : '4',
|
||||
'tv' : '8',
|
||||
'music' : '5',
|
||||
'games' : '3',
|
||||
'anime' : '1',
|
||||
'software' : '7',
|
||||
'pictures' : '6',
|
||||
'books' : '2'}
|
||||
|
||||
def download_torrent(self, info):
|
||||
print download_file(info)
|
||||
print(download_file(info))
|
||||
|
||||
class SimpleSGMLParser(sgmllib.SGMLParser):
|
||||
def __init__(self, results, url, *args):
|
||||
sgmllib.SGMLParser.__init__(self)
|
||||
class MyHtmlParseWithBlackJack(HTMLParser):
|
||||
""" Parser class """
|
||||
def __init__(self, list_searches, url):
|
||||
HTMLParser.__init__(self)
|
||||
self.list_searches = list_searches
|
||||
self.url = url
|
||||
self.td_counter = None
|
||||
self.table_results = False
|
||||
self.current_item = None
|
||||
self.results = results
|
||||
self.cur_item_name = None
|
||||
self.next_queries = True
|
||||
|
||||
def start_a(self, attr):
|
||||
params = dict(attr)
|
||||
#print params
|
||||
if params.has_key('href'):
|
||||
if params['href'].startswith("/get/"):
|
||||
self.current_item = {}
|
||||
self.td_counter = 0
|
||||
self.current_item['link']=self.url+params['href'].strip()
|
||||
elif params['href'].startswith("/tor/") and self.current_item is not None:
|
||||
self.current_item['desc_link']=self.url+params['href'].strip()
|
||||
def handle_starttag_tr(self, _):
|
||||
""" Handler of tr start tag """
|
||||
self.current_item = dict()
|
||||
|
||||
def handle_starttag_a(self, attrs):
|
||||
""" Handler of a start tag """
|
||||
params = dict(attrs)
|
||||
link = params["href"]
|
||||
|
||||
if link.startswith("/tor/"):
|
||||
#description
|
||||
self.current_item["desc_link"] = "".join((self.url, link))
|
||||
#get download link from description by id
|
||||
self.current_item["link"] = "".join((self.url, "/get/", link[5:-2]))
|
||||
self.cur_item_name = "name"
|
||||
self.current_item["name"] = ""
|
||||
elif self.next_queries and link.startswith("/search"):
|
||||
if params["title"].startswith("Page"):
|
||||
self.list_searches.append(link)
|
||||
|
||||
def handle_starttag_td(self, attrs):
|
||||
""" Handler of td start tag """
|
||||
if ("align", "right") in attrs:
|
||||
if not "size" in self.current_item:
|
||||
self.cur_item_name = "size"
|
||||
self.current_item["size"] = ""
|
||||
|
||||
def handle_starttag_span(self, attrs):
|
||||
""" Handler of span start tag """
|
||||
if ("class", "g") in attrs:
|
||||
self.cur_item_name = "seeds"
|
||||
self.current_item["seeds"] = ""
|
||||
elif ("class", "b") in attrs:
|
||||
self.cur_item_name = "leech"
|
||||
self.current_item["leech"] = ""
|
||||
|
||||
def handle_starttag(self, tag, attrs):
|
||||
""" Parser's start tag handler """
|
||||
if self.table_results:
|
||||
dispatcher = getattr(self, "_".join(("handle_starttag", tag)), None)
|
||||
if dispatcher:
|
||||
dispatcher(attrs)
|
||||
|
||||
elif tag == "table":
|
||||
self.table_results = ("class", "maintable") in attrs
|
||||
|
||||
def handle_endtag(self, tag):
|
||||
""" Parser's end tag handler """
|
||||
if tag == "tr" and self.current_item:
|
||||
self.current_item["engine_url"] = self.url
|
||||
prettyPrinter(self.current_item)
|
||||
self.current_item = None
|
||||
elif self.cur_item_name:
|
||||
if tag == "a" or tag == "td":
|
||||
self.cur_item_name = None
|
||||
|
||||
def handle_data(self, data):
|
||||
if self.td_counter == 0:
|
||||
if not self.current_item.has_key('name'):
|
||||
self.current_item['name'] = ''
|
||||
self.current_item['name']+= data
|
||||
elif self.td_counter == 1:
|
||||
if not self.current_item.has_key('size'):
|
||||
self.current_item['size'] = ''
|
||||
self.current_item['size']+= data.strip()
|
||||
elif self.td_counter == 2:
|
||||
if not self.current_item.has_key('seeds'):
|
||||
self.current_item['seeds'] = ''
|
||||
self.current_item['seeds']+= data.strip()
|
||||
elif self.td_counter == 3:
|
||||
if not self.current_item.has_key('leech'):
|
||||
self.current_item['leech'] = ''
|
||||
self.current_item['leech']+= data.strip()
|
||||
""" Parser's data handler """
|
||||
if self.cur_item_name:
|
||||
temp = self.current_item[self.cur_item_name]
|
||||
self.current_item[self.cur_item_name] = " ".join((temp, data))
|
||||
|
||||
def start_td(self,attr):
|
||||
if isinstance(self.td_counter,int):
|
||||
self.td_counter += 1
|
||||
if self.td_counter > 4:
|
||||
self.td_counter = None
|
||||
# Display item
|
||||
if self.current_item:
|
||||
self.current_item['engine_url'] = self.url
|
||||
if not self.current_item['seeds'].isdigit():
|
||||
self.current_item['seeds'] = 0
|
||||
if not self.current_item['leech'].isdigit():
|
||||
self.current_item['leech'] = 0
|
||||
prettyPrinter(self.current_item)
|
||||
self.results.append('a')
|
||||
def search(self, what, cat="all"):
|
||||
""" Performs search """
|
||||
connection = http("www.mininova.org")
|
||||
|
||||
def search(self, what, cat='all'):
|
||||
ret = []
|
||||
i = 1
|
||||
while True and i<11:
|
||||
results = []
|
||||
parser = self.SimpleSGMLParser(results, self.url)
|
||||
dat = retrieve_url(self.url+'/search/%s/%s/seeds/%d'%(what, self.supported_categories[cat], i))
|
||||
results_re = re.compile('(?s)<h1>Search results for.*')
|
||||
for match in results_re.finditer(dat):
|
||||
res_tab = match.group(0)
|
||||
parser.feed(res_tab)
|
||||
query = "/".join(("/search", what, self.supported_categories[cat], "seeds"))
|
||||
|
||||
connection.request("GET", query)
|
||||
response = connection.getresponse()
|
||||
if response.status != 200:
|
||||
return
|
||||
|
||||
list_searches = []
|
||||
parser = self.MyHtmlParseWithBlackJack(list_searches, self.url)
|
||||
parser.feed(response.read().decode('utf-8'))
|
||||
parser.close()
|
||||
break
|
||||
if len(results) <= 0:
|
||||
break
|
||||
i += 1
|
||||
|
||||
parser.next_queries = False
|
||||
for search_query in list_searches:
|
||||
connection.request("GET", search_query)
|
||||
response = connection.getresponse()
|
||||
parser.feed(response.read().decode('utf-8'))
|
||||
parser.close()
|
||||
|
||||
connection.close()
|
||||
return
|
||||
|
|
Binary file not shown.
Before Width: | Height: | Size: 252 B After Width: | Height: | Size: 951 B |
|
@ -1,4 +1,4 @@
|
|||
#VERSION: 1.33
|
||||
#VERSION: 1.36
|
||||
#AUTHORS: Gekko Dam Beer (gekko04@users.sourceforge.net)
|
||||
#CONTRIBUTORS: Christophe Dumez (chris@qbittorrent.org)
|
||||
# Bruno Barbieri (brunorex@gmail.com)
|
||||
|
@ -28,19 +28,18 @@
|
|||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
from novaprinter import prettyPrinter
|
||||
from helpers import retrieve_url, download_file
|
||||
from urllib2 import HTTPError
|
||||
from HTMLParser import HTMLParser
|
||||
from helpers import download_file, retrieve_url
|
||||
import urllib
|
||||
import re
|
||||
from HTMLParser import HTMLParser
|
||||
from re import compile as re_compile
|
||||
|
||||
class torrentreactor(object):
|
||||
url = 'http://www.torrentreactor.net'
|
||||
name = 'TorrentReactor.Net'
|
||||
name = 'TorrentReactor'
|
||||
supported_categories = {'all': '', 'movies': '5', 'tv': '8', 'music': '6', 'games': '3', 'anime': '1', 'software': '2'}
|
||||
|
||||
def download_torrent(self, info):
|
||||
print download_file(info)
|
||||
print(download_file(info))
|
||||
|
||||
class SimpleHTMLParser(HTMLParser):
|
||||
def __init__(self, results, url, *args):
|
||||
|
@ -50,6 +49,7 @@ class torrentreactor(object):
|
|||
self.results = results
|
||||
self.id = None
|
||||
self.url = url
|
||||
self.torrents_matcher = re_compile("/torrents/\d+.*")
|
||||
self.dispatcher = { 'a' : self.start_a, 'td' : self.start_td }
|
||||
|
||||
def handle_starttag(self, tag, attrs):
|
||||
|
@ -58,7 +58,7 @@ class torrentreactor(object):
|
|||
|
||||
def start_a(self, attr):
|
||||
params = dict(attr)
|
||||
if re.match("/torrents/\d+.*", params['href']):
|
||||
if self.torrents_matcher.match(params['href']):
|
||||
self.current_item = {}
|
||||
self.current_item['desc_link'] = self.url+params['href'].strip()
|
||||
elif 'torrentreactor.net/download.php' in params['href']:
|
||||
|
@ -68,15 +68,15 @@ class torrentreactor(object):
|
|||
|
||||
def handle_data(self, data):
|
||||
if self.td_counter == 1:
|
||||
if not self.current_item.has_key('size'):
|
||||
if 'size' not in self.current_item:
|
||||
self.current_item['size'] = ''
|
||||
self.current_item['size']+= data.strip()
|
||||
elif self.td_counter == 2:
|
||||
if not self.current_item.has_key('seeds'):
|
||||
if 'seeds' not in self.current_item:
|
||||
self.current_item['seeds'] = ''
|
||||
self.current_item['seeds']+= data.strip()
|
||||
elif self.td_counter == 3:
|
||||
if not self.current_item.has_key('leech'):
|
||||
if 'leech' not in self.current_item:
|
||||
self.current_item['leech'] = ''
|
||||
self.current_item['leech']+= data.strip()
|
||||
|
||||
|
@ -96,22 +96,14 @@ class torrentreactor(object):
|
|||
self.has_results = True
|
||||
self.results.append('a')
|
||||
|
||||
def __init__(self):
|
||||
self.results = []
|
||||
self.parser = self.SimpleHTMLParser(self.results, self.url)
|
||||
|
||||
def search(self, what, cat='all'):
|
||||
i = 0
|
||||
dat = ''
|
||||
while True and i<11:
|
||||
|
||||
while i < 11:
|
||||
results = []
|
||||
parser = self.SimpleHTMLParser(results, self.url)
|
||||
|
||||
try:
|
||||
dat = retrieve_url(self.url+'/torrent-search/%s/%s?sort=seeders.desc&type=all&period=none&categories=%s'%(what, (i*35), self.supported_categories[cat]))
|
||||
except HTTPError:
|
||||
break
|
||||
|
||||
dat = retrieve_url('%s/torrent-search/%s/%s?sort=seeders.desc&type=all&period=none&categories=%s'%(self.url, what, (i*35), self.supported_categories[cat]))
|
||||
parser.feed(dat)
|
||||
parser.close()
|
||||
if len(results) <= 0:
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
#VERSION: 2.13
|
||||
#VERSION: 2.14
|
||||
#AUTHORS: Diego de las Heras (diegodelasheras@gmail.com)
|
||||
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
|
@ -105,7 +105,7 @@ class torrentz(object):
|
|||
while i < 6:
|
||||
results_list = []
|
||||
# "what" is already urlencoded
|
||||
html = retrieve_url(self.url + '/any?f=%s&p=%d' % (what, i))
|
||||
html = retrieve_url('%s/any?f=%s&p=%d' % (self.url, what, i))
|
||||
parser = self.MyHtmlParser(results_list, self.url, trackers)
|
||||
parser.feed(html)
|
||||
parser.close()
|
||||
|
|
|
@ -1,8 +1,9 @@
|
|||
torrentreactor: 1.33
|
||||
mininova: 1.51
|
||||
piratebay: 2.11
|
||||
extratorrent: 1.2
|
||||
torrentreactor: 1.36
|
||||
mininova: 2.00
|
||||
piratebay: 2.11
|
||||
extratorrent: 2.0
|
||||
kickasstorrents: 1.26
|
||||
btdigg: 1.24
|
||||
legittorrents: 1.02
|
||||
torrentz: 2.13
|
||||
torrentz: 2.14
|
||||
legittorrents: 1.03
|
||||
|
|
|
@ -26,7 +26,7 @@
|
|||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
|
||||
#VERSION: 1.32
|
||||
#VERSION: 1.40
|
||||
|
||||
# Author:
|
||||
# Fabien Devaux <fab AT gnux DOT info>
|
||||
|
@ -37,16 +37,15 @@
|
|||
#
|
||||
# Licence: BSD
|
||||
|
||||
import sys
|
||||
import threading
|
||||
import os
|
||||
import glob
|
||||
import urllib
|
||||
|
||||
import fix_encoding
|
||||
from os import path
|
||||
from glob import glob
|
||||
from sys import argv
|
||||
from multiprocessing import Pool, cpu_count
|
||||
from fix_encoding import fix_encoding
|
||||
|
||||
THREADED = True
|
||||
CATEGORIES = ('all', 'movies', 'tv', 'music', 'games', 'anime', 'software', 'pictures', 'books')
|
||||
CATEGORIES = {'all', 'movies', 'tv', 'music', 'games', 'anime', 'software', 'pictures', 'books'}
|
||||
|
||||
################################################################################
|
||||
# Every engine should have a "search" method taking
|
||||
|
@ -56,34 +55,50 @@ CATEGORIES = ('all', 'movies', 'tv', 'music', 'games', 'anime', 'software', 'pic
|
|||
# As a convention, try to list results by decrasing number of seeds or similar
|
||||
################################################################################
|
||||
|
||||
supported_engines = []
|
||||
def initialize_engines():
|
||||
""" Import available engines
|
||||
|
||||
engines = glob.glob(os.path.join(os.path.dirname(__file__), 'engines','*.py'))
|
||||
for engine in engines:
|
||||
e = engine.split(os.sep)[-1][:-3]
|
||||
if len(e.strip()) == 0: continue
|
||||
if e.startswith('_'): continue
|
||||
Return list of available engines
|
||||
"""
|
||||
supported_engines = []
|
||||
|
||||
engines = glob(path.join(path.dirname(__file__), 'engines', '*.py'))
|
||||
for engine in engines:
|
||||
engi = path.basename(engine).split('.')[0].strip()
|
||||
if len(engi) == 0 or engi.startswith('_'):
|
||||
continue
|
||||
try:
|
||||
exec "from engines.%s import %s"%(e,e)
|
||||
supported_engines.append(e)
|
||||
#import engines.[engine]
|
||||
engine_module = __import__(".".join(("engines", engi)))
|
||||
#get low-level module
|
||||
engine_module = getattr(engine_module, engi)
|
||||
#bind class name
|
||||
globals()[engi] = getattr(engine_module, engi)
|
||||
supported_engines.append(engi)
|
||||
except:
|
||||
pass
|
||||
|
||||
def engineToXml(short_name):
|
||||
xml = "<%s>\n"%short_name
|
||||
exec "engine = %s()"%short_name
|
||||
xml += "<name>%s</name>\n"%engine.name
|
||||
xml += "<url>%s</url>\n"%engine.url
|
||||
xml += "<categories>"
|
||||
if hasattr(engine, 'supported_categories'):
|
||||
supported_categories = engine.supported_categories.keys()
|
||||
supported_categories.remove('all')
|
||||
xml += " ".join(supported_categories)
|
||||
xml += "</categories>\n"
|
||||
xml += "</%s>\n"%short_name
|
||||
return xml
|
||||
return supported_engines
|
||||
|
||||
def displayCapabilities():
|
||||
def engines_to_xml(supported_engines):
|
||||
""" Generates xml for supported engines """
|
||||
tab = " " * 4
|
||||
|
||||
for short_name in supported_engines:
|
||||
search_engine = globals()[short_name]()
|
||||
|
||||
supported_categories = ""
|
||||
if hasattr(search_engine, "supported_categories"):
|
||||
supported_categories = " ".join((key for key in search_engine.supported_categories.keys()
|
||||
if key is not "all"))
|
||||
|
||||
yield "".join((tab, "<", short_name, ">\n",
|
||||
tab, tab, "<name>", search_engine.name, "</name>\n",
|
||||
tab, tab, "<url>", search_engine.url, "</url>\n",
|
||||
tab, tab, "<categories>", supported_categories, "</categories>\n",
|
||||
tab, "</", short_name, ">\n"))
|
||||
|
||||
def displayCapabilities(supported_engines):
|
||||
"""
|
||||
Display capabilities in XML format
|
||||
<capabilities>
|
||||
|
@ -94,70 +109,75 @@ def displayCapabilities():
|
|||
</engine_short_name>
|
||||
</capabilities>
|
||||
"""
|
||||
xml = "<capabilities>"
|
||||
for short_name in supported_engines:
|
||||
xml += engineToXml(short_name)
|
||||
xml += "</capabilities>"
|
||||
print xml
|
||||
xml = "".join(("<capabilities>\n",
|
||||
"".join(engines_to_xml(supported_engines)),
|
||||
"</capabilities>"))
|
||||
print(xml)
|
||||
|
||||
class EngineLauncher(threading.Thread):
|
||||
def __init__(self, engine, what, cat='all'):
|
||||
threading.Thread.__init__(self)
|
||||
self.engine = engine
|
||||
self.what = what
|
||||
self.cat = cat
|
||||
def run(self):
|
||||
if hasattr(self.engine, 'supported_categories'):
|
||||
if self.cat == 'all' or self.cat in self.engine.supported_categories.keys():
|
||||
self.engine.search(self.what, self.cat)
|
||||
elif self.cat == 'all':
|
||||
self.engine.search(self.what)
|
||||
def run_search(engine_list):
|
||||
""" Run search in engine
|
||||
|
||||
if __name__ == '__main__':
|
||||
# Make sure we enforce utf-8 encoding
|
||||
fix_encoding.fix_encoding()
|
||||
@param engine_list List with engine, query and category
|
||||
|
||||
if len(sys.argv) < 2:
|
||||
raise SystemExit('./nova2.py [all|engine1[,engine2]*] <category> <keywords>\navailable engines: %s'%
|
||||
(','.join(supported_engines)))
|
||||
|
||||
if len(sys.argv) == 2:
|
||||
if sys.argv[1] == "--capabilities":
|
||||
displayCapabilities()
|
||||
sys.exit(0)
|
||||
@retval False if any exceptions occured
|
||||
@retval True otherwise
|
||||
"""
|
||||
engine, what, cat = engine_list
|
||||
try:
|
||||
engine = engine()
|
||||
#avoid exceptions due to invalid category
|
||||
if hasattr(engine, 'supported_categories'):
|
||||
cat = cat if cat in engine.supported_categories else "all"
|
||||
engine.search(what, cat)
|
||||
else:
|
||||
raise SystemExit('./nova.py [all|engine1[,engine2]*] <category> <keywords>\navailable engines: %s'%
|
||||
(','.join(supported_engines)))
|
||||
engine.search(what)
|
||||
return True
|
||||
except:
|
||||
return False
|
||||
|
||||
engines_list = [e.lower() for e in sys.argv[1].strip().split(',')]
|
||||
def main(args):
|
||||
fix_encoding()
|
||||
supported_engines = initialize_engines()
|
||||
|
||||
if not args:
|
||||
raise SystemExit("./nova2.py [all|engine1[,engine2]*] <category> <keywords>\n"
|
||||
"available engines: %s" % (','.join(supported_engines)))
|
||||
|
||||
elif args[0] == "--capabilities":
|
||||
displayCapabilities(supported_engines)
|
||||
return
|
||||
|
||||
elif len(args) < 3:
|
||||
raise SystemExit("./nova2.py [all|engine1[,engine2]*] <category> <keywords>\n"
|
||||
"available engines: %s" % (','.join(supported_engines)))
|
||||
|
||||
#get only unique engines with set
|
||||
engines_list = set(e.lower() for e in args[0].strip().split(','))
|
||||
|
||||
if 'all' in engines_list:
|
||||
engines_list = supported_engines
|
||||
else:
|
||||
#discard un-supported engines
|
||||
engines_list = [engine for engine in engines_list
|
||||
if engine in supported_engines]
|
||||
|
||||
cat = sys.argv[2].lower()
|
||||
if not engines_list:
|
||||
#engine list is empty. Nothing to do here
|
||||
return
|
||||
|
||||
cat = args[1].lower()
|
||||
|
||||
if cat not in CATEGORIES:
|
||||
raise SystemExit('Invalid category!')
|
||||
raise SystemExit(" - ".join(('Invalid category', cat)))
|
||||
|
||||
what = urllib.quote(' '.join(sys.argv[3:]))
|
||||
what = urllib.quote(' '.join(args[2:]))
|
||||
|
||||
threads = []
|
||||
for engine in engines_list:
|
||||
try:
|
||||
if THREADED:
|
||||
exec "l = EngineLauncher(%s(), what, cat)"%engine
|
||||
threads.append(l)
|
||||
l.start()
|
||||
#child process spawning is controlled min(number of searches, number of cpu)
|
||||
pool = Pool(min(len(engines_list), cpu_count()))
|
||||
pool.map(run_search, ([globals()[engine], what, cat] for engine in engines_list))
|
||||
else:
|
||||
exec "e = %s()"%engine
|
||||
if hasattr(engine, 'supported_categories'):
|
||||
if cat == 'all' or cat in e.supported_categories.keys():
|
||||
e.search(what, cat)
|
||||
elif self.cat == 'all':
|
||||
e.search(what)
|
||||
engine().search(what, cat)
|
||||
except:
|
||||
pass
|
||||
if THREADED:
|
||||
for t in threads:
|
||||
t.join()
|
||||
map(run_search, ([globals()[engine], what, cat] for engine in engines_list))
|
||||
|
||||
if __name__ == "__main__":
|
||||
main(argv[1:])
|
||||
|
|
|
@ -25,7 +25,7 @@
|
|||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#VERSION: 1.10
|
||||
#VERSION: 1.20
|
||||
|
||||
# Author:
|
||||
# Christophe DUMEZ (chris@qbittorrent.org)
|
||||
|
@ -43,8 +43,8 @@ for engine in engines:
|
|||
if len(e.strip()) == 0: continue
|
||||
if e.startswith('_'): continue
|
||||
try:
|
||||
exec "from engines.%s import %s"%(e,e)
|
||||
exec "engine_url = %s.url"%e
|
||||
exec("from engines.%s import %s"%(e,e))
|
||||
exec("engine_url = %s.url"%e)
|
||||
supported_engines[engine_url] = e
|
||||
except:
|
||||
pass
|
||||
|
@ -54,11 +54,11 @@ if __name__ == '__main__':
|
|||
raise SystemExit('./nova2dl.py engine_url download_parameter')
|
||||
engine_url = sys.argv[1].strip()
|
||||
download_param = sys.argv[2].strip()
|
||||
if engine_url not in supported_engines.keys():
|
||||
if engine_url not in list(supported_engines.keys()):
|
||||
raise SystemExit('./nova2dl.py: this engine_url was not recognized')
|
||||
exec "engine = %s()"%supported_engines[engine_url]
|
||||
exec("engine = %s()"%supported_engines[engine_url])
|
||||
if hasattr(engine, 'download_torrent'):
|
||||
engine.download_torrent(download_param)
|
||||
else:
|
||||
print download_file(download_param)
|
||||
print(download_file(download_param))
|
||||
sys.exit(0)
|
|
@ -25,20 +25,19 @@
|
|||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
import sys, codecs
|
||||
from io import open
|
||||
|
||||
# Force UTF-8 printing
|
||||
sys.stdout = codecs.getwriter('utf-8')(sys.stdout)
|
||||
|
||||
def prettyPrinter(dictionary):
|
||||
# Convert everything to unicode for safe printing
|
||||
for key,value in dictionary.items():
|
||||
if isinstance(dictionary[key], str):
|
||||
dictionary[key] = unicode(dictionary[key], 'utf-8')
|
||||
dictionary['size'] = anySizeToBytes(dictionary['size'])
|
||||
if dictionary.has_key('desc_link'):
|
||||
print u"%s|%s|%s|%s|%s|%s|%s"%(dictionary['link'],dictionary['name'].replace('|',' '),dictionary['size'],dictionary['seeds'],dictionary['leech'],dictionary['engine_url'],dictionary['desc_link'])
|
||||
else:
|
||||
print u"%s|%s|%s|%s|%s|%s"%(dictionary['link'],dictionary['name'].replace('|',' '),dictionary['size'],dictionary['seeds'],dictionary['leech'],dictionary['engine_url'])
|
||||
outtext = "|".join((dictionary["link"], dictionary["name"].replace("|", " "), str(dictionary["size"]), str(dictionary["seeds"]), str(dictionary["leech"]), dictionary["engine_url"]))
|
||||
if 'desc_link' in dictionary:
|
||||
outtext = "|".join((outtext, dictionary["desc_link"]))
|
||||
|
||||
with open(1, 'w', encoding='utf-8', closefd=False) as utf8_stdout:
|
||||
utf8_stdout.write(unicode("".join((outtext, "\n"))))
|
||||
|
||||
def anySizeToBytes(size_string):
|
||||
"""
|
||||
|
@ -63,7 +62,7 @@ def anySizeToBytes(size_string):
|
|||
short_unit = unit.upper()[0]
|
||||
|
||||
# convert
|
||||
units_dict = { 'T': 40, 'G': 30, 'M': 20, 'K': 10 }
|
||||
if units_dict.has_key( short_unit ):
|
||||
units_dict = {'T': 40, 'G': 30, 'M': 20, 'K': 10}
|
||||
if units_dict.has_key(short_unit):
|
||||
size = size * 2**units_dict[short_unit]
|
||||
return int(size)
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
#VERSION: 1.2
|
||||
#VERSION: 2.0
|
||||
#AUTHORS: Christophe Dumez (chris@qbittorrent.org)
|
||||
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
|
@ -25,92 +25,139 @@
|
|||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
|
||||
from html.parser import HTMLParser
|
||||
from http.client import HTTPConnection as http
|
||||
#qBt
|
||||
from novaprinter import prettyPrinter
|
||||
from helpers import retrieve_url, download_file
|
||||
import sgmllib3
|
||||
import re
|
||||
from helpers import download_file
|
||||
|
||||
class extratorrent(object):
|
||||
""" Search engine class """
|
||||
url = 'http://extratorrent.cc'
|
||||
name = 'extratorrent'
|
||||
supported_categories = {'all': '', 'movies': '4', 'tv': '8', 'music': '5', 'games': '3', 'anime': '1', 'software': '7', 'books': '2', 'pictures': '6'}
|
||||
|
||||
def __init__(self):
|
||||
self.results = []
|
||||
self.parser = self.SimpleSGMLParser(self.results, self.url)
|
||||
name = 'ExtraTorrent'
|
||||
supported_categories = {'all' : '0',
|
||||
'movies' : '4',
|
||||
'tv' : '8',
|
||||
'music' : '5',
|
||||
'games' : '3',
|
||||
'anime' : '1',
|
||||
'software' : '7',
|
||||
'books' : '2',
|
||||
'pictures' : '6'}
|
||||
|
||||
def download_torrent(self, info):
|
||||
""" Downloader """
|
||||
print(download_file(info))
|
||||
|
||||
class SimpleSGMLParser(sgmllib3.SGMLParser):
|
||||
def __init__(self, results, url, *args):
|
||||
sgmllib3.SGMLParser.__init__(self)
|
||||
class MyHtmlParseWithBlackJack(HTMLParser):
|
||||
""" Parser class """
|
||||
def __init__(self, list_searches, url):
|
||||
HTMLParser.__init__(self)
|
||||
self.url = url
|
||||
self.td_counter = None
|
||||
self.list_searches = list_searches
|
||||
self.current_item = None
|
||||
self.start_name = False
|
||||
self.results = results
|
||||
self.cur_item_name = None
|
||||
self.pending_size = False
|
||||
self.next_queries = True
|
||||
self.pending_next_queries = False
|
||||
self.next_queries_set = set()
|
||||
|
||||
def start_a(self, attr):
|
||||
params = dict(attr)
|
||||
#print params
|
||||
if 'href' in params and params['href'].startswith("/torrent_download/"):
|
||||
self.current_item = {}
|
||||
self.td_counter = 0
|
||||
self.start_name = False
|
||||
torrent_id = '/'.join(params['href'].split('/')[2:])
|
||||
self.current_item['link']=self.url+'/download/'+torrent_id
|
||||
elif 'href' in params and params['href'].startswith("/torrent/") and params['href'].endswith(".html"):
|
||||
self.current_item['desc_link'] = self.url + params['href'].strip()
|
||||
self.start_name = True
|
||||
def handle_starttag(self, tag, attrs):
|
||||
if self.current_item:
|
||||
if tag == "a":
|
||||
params = dict(attrs)
|
||||
link = params['href']
|
||||
|
||||
if not link.startswith("/torrent"):
|
||||
return
|
||||
|
||||
if link[8] == "/":
|
||||
#description
|
||||
self.current_item["desc_link"] = "".join((self.url, link))
|
||||
#remove view at the beginning
|
||||
self.current_item["name"] = params["title"][5:].replace("&", "&")
|
||||
self.pending_size = True
|
||||
elif link[8] == "_":
|
||||
#download link
|
||||
link = link.replace("torrent_", "", 1)
|
||||
self.current_item["link"] = "".join((self.url, link))
|
||||
|
||||
elif tag == "td":
|
||||
if self.pending_size:
|
||||
self.cur_item_name = "size"
|
||||
self.current_item["size"] = ""
|
||||
self.pending_size = False
|
||||
|
||||
for attr in attrs:
|
||||
if attr[0] == "class":
|
||||
if attr[1][0] == "s":
|
||||
self.cur_item_name = "seeds"
|
||||
self.current_item["seeds"] = ""
|
||||
elif attr[1][0] == "l":
|
||||
self.cur_item_name = "leech"
|
||||
self.current_item["leech"] = ""
|
||||
break
|
||||
|
||||
|
||||
elif tag == "tr":
|
||||
for attr in attrs:
|
||||
if attr[0] == "class" and attr[1].startswith("tl"):
|
||||
self.current_item = dict()
|
||||
self.current_item["engine_url"] = self.url
|
||||
break
|
||||
|
||||
elif self.pending_next_queries:
|
||||
if tag == "a":
|
||||
params = dict(attrs)
|
||||
if params["title"] in self.next_queries_set:
|
||||
return
|
||||
self.list_searches.append(params['href'])
|
||||
self.next_queries_set.add(params["title"])
|
||||
if params["title"] == "10":
|
||||
self.pending_next_queries = False
|
||||
else:
|
||||
self.pending_next_queries = False
|
||||
|
||||
elif self.next_queries:
|
||||
if tag == "b" and ("class", "pager_no_link") in attrs:
|
||||
self.next_queries = False
|
||||
self.pending_next_queries = True
|
||||
|
||||
def handle_data(self, data):
|
||||
if self.td_counter == 2:
|
||||
if 'name' not in self.current_item and self.start_name:
|
||||
self.current_item['name'] = data.strip()
|
||||
elif self.td_counter == 3:
|
||||
if 'size' not in self.current_item:
|
||||
self.current_item['size'] = ''
|
||||
self.current_item['size']+= data.replace(" ", " ").strip()
|
||||
elif self.td_counter == 4:
|
||||
if 'seeds' not in self.current_item:
|
||||
self.current_item['seeds'] = ''
|
||||
self.current_item['seeds']+= data.strip()
|
||||
elif self.td_counter == 5:
|
||||
if 'leech' not in self.current_item:
|
||||
self.current_item['leech'] = ''
|
||||
self.current_item['leech']+= data.strip()
|
||||
if self.cur_item_name:
|
||||
temp = self.current_item[self.cur_item_name]
|
||||
self.current_item[self.cur_item_name] = " ".join((temp, data))
|
||||
#Due to utf-8 we need to handle data two times if there is space
|
||||
if not self.cur_item_name == "size":
|
||||
self.cur_item_name = None
|
||||
|
||||
def start_td(self,attr):
|
||||
if isinstance(self.td_counter,int):
|
||||
self.td_counter += 1
|
||||
if self.td_counter > 5:
|
||||
self.td_counter = None
|
||||
# Display item
|
||||
def handle_endtag(self, tag):
|
||||
if self.current_item:
|
||||
self.current_item['engine_url'] = self.url
|
||||
if not self.current_item['seeds'].isdigit():
|
||||
self.current_item['seeds'] = 0
|
||||
if not self.current_item['leech'].isdigit():
|
||||
self.current_item['leech'] = 0
|
||||
if tag == "tr":
|
||||
prettyPrinter(self.current_item)
|
||||
self.results.append('a')
|
||||
self.current_item = None
|
||||
|
||||
def search(self, what, cat='all'):
|
||||
ret = []
|
||||
i = 1
|
||||
while True and i<11:
|
||||
results = []
|
||||
parser = self.SimpleSGMLParser(results, self.url)
|
||||
dat = retrieve_url(self.url+'/advanced_search/?with=%s&s_cat=%s&page=%d'%(what, self.supported_categories[cat], i))
|
||||
results_re = re.compile('(?s)<table class="tl"><thead>.*')
|
||||
for match in results_re.finditer(dat):
|
||||
res_tab = match.group(0)
|
||||
parser.feed(res_tab)
|
||||
def search(self, what, cat="all"):
|
||||
""" Performs search """
|
||||
connection = http("extratorrent.cc")
|
||||
|
||||
query = "".join(("/search/?new=1&search=", what, "&s_cat=", self.supported_categories[cat]))
|
||||
|
||||
connection.request("GET", query)
|
||||
response = connection.getresponse()
|
||||
if response.status != 200:
|
||||
return
|
||||
|
||||
list_searches = []
|
||||
parser = self.MyHtmlParseWithBlackJack(list_searches, self.url)
|
||||
parser.feed(response.read().decode('utf-8'))
|
||||
parser.close()
|
||||
break
|
||||
if len(results) <= 0:
|
||||
break
|
||||
i += 1
|
||||
|
||||
for search_query in list_searches:
|
||||
connection.request("GET", search_query)
|
||||
response = connection.getresponse()
|
||||
parser.feed(response.read().decode('utf-8'))
|
||||
parser.close()
|
||||
|
||||
connection.close()
|
||||
return
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
#VERSION: 1.03
|
||||
#VERSION: 1.04
|
||||
#AUTHORS: Christophe Dumez (chris@qbittorrent.org)
|
||||
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
|
@ -36,10 +36,6 @@ class legittorrents(object):
|
|||
name = 'legittorrents'
|
||||
supported_categories = {'all': '', 'movies': '1', 'tv': '13', 'music': '2', 'games': '3', 'anime': '5', 'books': '6'}
|
||||
|
||||
def __init__(self):
|
||||
self.results = []
|
||||
self.parser = self.SimpleSGMLParser(self.results, self.url)
|
||||
|
||||
def download_torrent(self, info):
|
||||
print(download_file(info))
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
#VERSION: 1.51
|
||||
#VERSION: 2.00
|
||||
#AUTHORS: Christophe Dumez (chris@qbittorrent.org)
|
||||
#CONTRIBUTORS: Diego de las Heras (diegodelasheras@gmail.com)
|
||||
|
||||
|
@ -26,90 +26,123 @@
|
|||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
from html.parser import HTMLParser
|
||||
from http.client import HTTPConnection as http
|
||||
from novaprinter import prettyPrinter
|
||||
from helpers import retrieve_url, download_file
|
||||
import sgmllib3
|
||||
import re
|
||||
from helpers import download_file
|
||||
|
||||
class mininova(object):
|
||||
# Mandatory properties
|
||||
""" Search engine class """
|
||||
url = 'http://www.mininova.org'
|
||||
name = 'Mininova'
|
||||
supported_categories = {'all': '0', 'movies': '4', 'tv': '8', 'music': '5', 'games': '3', 'anime': '1', 'software': '7', 'pictures': '6', 'books': '2'}
|
||||
|
||||
def __init__(self):
|
||||
self.results = []
|
||||
self.parser = self.SimpleSGMLParser(self.results, self.url)
|
||||
supported_categories = {'all' : '0',
|
||||
'movies' : '4',
|
||||
'tv' : '8',
|
||||
'music' : '5',
|
||||
'games' : '3',
|
||||
'anime' : '1',
|
||||
'software' : '7',
|
||||
'pictures' : '6',
|
||||
'books' : '2'}
|
||||
|
||||
def download_torrent(self, info):
|
||||
print(download_file(info))
|
||||
|
||||
class SimpleSGMLParser(sgmllib3.SGMLParser):
|
||||
def __init__(self, results, url, *args):
|
||||
sgmllib3.SGMLParser.__init__(self)
|
||||
class MyHtmlParseWithBlackJack(HTMLParser):
|
||||
""" Parser class """
|
||||
def __init__(self, list_searches, url):
|
||||
HTMLParser.__init__(self)
|
||||
self.list_searches = list_searches
|
||||
self.url = url
|
||||
self.td_counter = None
|
||||
self.table_results = False
|
||||
self.current_item = None
|
||||
self.results = results
|
||||
self.cur_item_name = None
|
||||
self.next_queries = True
|
||||
|
||||
def start_a(self, attr):
|
||||
params = dict(attr)
|
||||
#print params
|
||||
if 'href' in params:
|
||||
if params['href'].startswith("/get/"):
|
||||
self.current_item = {}
|
||||
self.td_counter = 0
|
||||
self.current_item['link']=self.url+params['href'].strip()
|
||||
elif params['href'].startswith("/tor/") and self.current_item is not None:
|
||||
self.current_item['desc_link']=self.url+params['href'].strip()
|
||||
def handle_starttag_tr(self, _):
|
||||
""" Handler of tr start tag """
|
||||
self.current_item = dict()
|
||||
|
||||
def handle_starttag_a(self, attrs):
|
||||
""" Handler of a start tag """
|
||||
params = dict(attrs)
|
||||
link = params["href"]
|
||||
|
||||
if link.startswith("/tor/"):
|
||||
#description
|
||||
self.current_item["desc_link"] = "".join((self.url, link))
|
||||
#get download link from description by id
|
||||
self.current_item["link"] = "".join((self.url, "/get/", link[5:-2]))
|
||||
self.cur_item_name = "name"
|
||||
self.current_item["name"] = ""
|
||||
elif self.next_queries and link.startswith("/search"):
|
||||
if params["title"].startswith("Page"):
|
||||
self.list_searches.append(link)
|
||||
|
||||
def handle_starttag_td(self, attrs):
|
||||
""" Handler of td start tag """
|
||||
if ("align", "right") in attrs:
|
||||
if not "size" in self.current_item:
|
||||
self.cur_item_name = "size"
|
||||
self.current_item["size"] = ""
|
||||
|
||||
def handle_starttag_span(self, attrs):
|
||||
""" Handler of span start tag """
|
||||
if ("class", "g") in attrs:
|
||||
self.cur_item_name = "seeds"
|
||||
self.current_item["seeds"] = ""
|
||||
elif ("class", "b") in attrs:
|
||||
self.cur_item_name = "leech"
|
||||
self.current_item["leech"] = ""
|
||||
|
||||
def handle_starttag(self, tag, attrs):
|
||||
""" Parser's start tag handler """
|
||||
if self.table_results:
|
||||
dispatcher = getattr(self, "_".join(("handle_starttag", tag)), None)
|
||||
if dispatcher:
|
||||
dispatcher(attrs)
|
||||
|
||||
elif tag == "table":
|
||||
self.table_results = ("class", "maintable") in attrs
|
||||
|
||||
def handle_endtag(self, tag):
|
||||
""" Parser's end tag handler """
|
||||
if tag == "tr" and self.current_item:
|
||||
self.current_item["engine_url"] = self.url
|
||||
prettyPrinter(self.current_item)
|
||||
self.current_item = None
|
||||
elif self.cur_item_name:
|
||||
if tag == "a" or tag == "td":
|
||||
self.cur_item_name = None
|
||||
|
||||
def handle_data(self, data):
|
||||
if self.td_counter == 0:
|
||||
if 'name' not in self.current_item:
|
||||
self.current_item['name'] = ''
|
||||
self.current_item['name']+= data
|
||||
elif self.td_counter == 1:
|
||||
if 'size' not in self.current_item:
|
||||
self.current_item['size'] = ''
|
||||
self.current_item['size']+= data.strip()
|
||||
elif self.td_counter == 2:
|
||||
if 'seeds' not in self.current_item:
|
||||
self.current_item['seeds'] = ''
|
||||
self.current_item['seeds']+= data.strip()
|
||||
elif self.td_counter == 3:
|
||||
if 'leech' not in self.current_item:
|
||||
self.current_item['leech'] = ''
|
||||
self.current_item['leech']+= data.strip()
|
||||
""" Parser's data handler """
|
||||
if self.cur_item_name:
|
||||
temp = self.current_item[self.cur_item_name]
|
||||
self.current_item[self.cur_item_name] = " ".join((temp, data))
|
||||
|
||||
def start_td(self,attr):
|
||||
if isinstance(self.td_counter,int):
|
||||
self.td_counter += 1
|
||||
if self.td_counter > 4:
|
||||
self.td_counter = None
|
||||
# Display item
|
||||
if self.current_item:
|
||||
self.current_item['engine_url'] = self.url
|
||||
if not self.current_item['seeds'].isdigit():
|
||||
self.current_item['seeds'] = 0
|
||||
if not self.current_item['leech'].isdigit():
|
||||
self.current_item['leech'] = 0
|
||||
prettyPrinter(self.current_item)
|
||||
self.results.append('a')
|
||||
def search(self, what, cat="all"):
|
||||
""" Performs search """
|
||||
connection = http("www.mininova.org")
|
||||
|
||||
def search(self, what, cat='all'):
|
||||
ret = []
|
||||
i = 1
|
||||
while True and i<11:
|
||||
results = []
|
||||
parser = self.SimpleSGMLParser(results, self.url)
|
||||
dat = retrieve_url(self.url+'/search/%s/%s/seeds/%d'%(what, self.supported_categories[cat], i))
|
||||
results_re = re.compile('(?s)<h1>Search results for.*')
|
||||
for match in results_re.finditer(dat):
|
||||
res_tab = match.group(0)
|
||||
parser.feed(res_tab)
|
||||
query = "/".join(("/search", what, self.supported_categories[cat], "seeds"))
|
||||
|
||||
connection.request("GET", query)
|
||||
response = connection.getresponse()
|
||||
if response.status != 200:
|
||||
return
|
||||
|
||||
list_searches = []
|
||||
parser = self.MyHtmlParseWithBlackJack(list_searches, self.url)
|
||||
parser.feed(response.read().decode('utf-8'))
|
||||
parser.close()
|
||||
break
|
||||
if len(results) <= 0:
|
||||
break
|
||||
i += 1
|
||||
|
||||
parser.next_queries = False
|
||||
for search_query in list_searches:
|
||||
connection.request("GET", search_query)
|
||||
response = connection.getresponse()
|
||||
parser.feed(response.read().decode('utf-8'))
|
||||
parser.close()
|
||||
|
||||
connection.close()
|
||||
return
|
||||
|
|
Binary file not shown.
Before Width: | Height: | Size: 252 B After Width: | Height: | Size: 951 B |
|
@ -1,4 +1,4 @@
|
|||
#VERSION: 1.33
|
||||
#VERSION: 1.36
|
||||
#AUTHORS: Gekko Dam Beer (gekko04@users.sourceforge.net)
|
||||
#CONTRIBUTORS: Christophe Dumez (chris@qbittorrent.org)
|
||||
# Bruno Barbieri (brunorex@gmail.com)
|
||||
|
@ -28,14 +28,14 @@
|
|||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
from novaprinter import prettyPrinter
|
||||
from helpers import retrieve_url, download_file
|
||||
from urllib import error, parse
|
||||
from helpers import download_file, retrieve_url
|
||||
from urllib import parse
|
||||
from html.parser import HTMLParser
|
||||
import re
|
||||
from re import compile as re_compile
|
||||
|
||||
class torrentreactor(object):
|
||||
url = 'http://www.torrentreactor.net'
|
||||
name = 'TorrentReactor.Net'
|
||||
name = 'TorrentReactor'
|
||||
supported_categories = {'all': '', 'movies': '5', 'tv': '8', 'music': '6', 'games': '3', 'anime': '1', 'software': '2'}
|
||||
|
||||
def download_torrent(self, info):
|
||||
|
@ -49,6 +49,7 @@ class torrentreactor(object):
|
|||
self.results = results
|
||||
self.id = None
|
||||
self.url = url
|
||||
self.torrents_matcher = re_compile("/torrents/\d+.*")
|
||||
self.dispatcher = { 'a' : self.start_a, 'td' : self.start_td }
|
||||
|
||||
def handle_starttag(self, tag, attrs):
|
||||
|
@ -57,7 +58,7 @@ class torrentreactor(object):
|
|||
|
||||
def start_a(self, attr):
|
||||
params = dict(attr)
|
||||
if re.match("/torrents/\d+.*", params['href']):
|
||||
if self.torrents_matcher.match(params['href']):
|
||||
self.current_item = {}
|
||||
self.current_item['desc_link'] = self.url+params['href'].strip()
|
||||
elif 'torrentreactor.net/download.php' in params['href']:
|
||||
|
@ -95,22 +96,14 @@ class torrentreactor(object):
|
|||
self.has_results = True
|
||||
self.results.append('a')
|
||||
|
||||
def __init__(self):
|
||||
self.results = []
|
||||
self.parser = self.SimpleHTMLParser(self.results, self.url)
|
||||
|
||||
def search(self, what, cat='all'):
|
||||
i = 0
|
||||
dat = ''
|
||||
while True and i<11:
|
||||
|
||||
while i < 11:
|
||||
results = []
|
||||
parser = self.SimpleHTMLParser(results, self.url)
|
||||
|
||||
try:
|
||||
dat = retrieve_url(self.url+'/torrent-search/%s/%s?sort=seeders.desc&type=all&period=none&categories=%s'%(what, (i*35), self.supported_categories[cat]))
|
||||
except error.HTTPError:
|
||||
break
|
||||
|
||||
dat = retrieve_url('%s/torrent-search/%s/%s?sort=seeders.desc&type=all&period=none&categories=%s'%(self.url, what, (i*35), self.supported_categories[cat]))
|
||||
parser.feed(dat)
|
||||
parser.close()
|
||||
if len(results) <= 0:
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
#VERSION: 2.13
|
||||
#VERSION: 2.14
|
||||
#AUTHORS: Diego de las Heras (diegodelasheras@gmail.com)
|
||||
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
|
|
|
@ -1,8 +1,9 @@
|
|||
torrentreactor: 1.33
|
||||
mininova: 1.51
|
||||
piratebay: 2.11
|
||||
extratorrent: 1.2
|
||||
torrentreactor: 1.36
|
||||
mininova: 2.00
|
||||
piratebay: 2.11
|
||||
extratorrent: 2.0
|
||||
kickasstorrents: 1.26
|
||||
btdigg: 1.23
|
||||
legittorrents: 1.03
|
||||
torrentz: 2.13
|
||||
torrentz: 2.14
|
||||
legittorrents: 1.04
|
||||
|
|
|
@ -26,7 +26,7 @@
|
|||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
|
||||
#VERSION: 1.24
|
||||
#VERSION: 1.40
|
||||
|
||||
# Author:
|
||||
# Fabien Devaux <fab AT gnux DOT info>
|
||||
|
@ -37,14 +37,14 @@
|
|||
#
|
||||
# Licence: BSD
|
||||
|
||||
import sys
|
||||
import threading
|
||||
import os
|
||||
import glob
|
||||
import urllib.parse
|
||||
from os import path, cpu_count
|
||||
from glob import glob
|
||||
from sys import argv
|
||||
from multiprocessing import Pool
|
||||
|
||||
THREADED = True
|
||||
CATEGORIES = ('all', 'movies', 'tv', 'music', 'games', 'anime', 'software', 'pictures', 'books')
|
||||
CATEGORIES = {'all', 'movies', 'tv', 'music', 'games', 'anime', 'software', 'pictures', 'books'}
|
||||
|
||||
################################################################################
|
||||
# Every engine should have a "search" method taking
|
||||
|
@ -54,34 +54,50 @@ CATEGORIES = ('all', 'movies', 'tv', 'music', 'games', 'anime', 'software', 'pic
|
|||
# As a convention, try to list results by decrasing number of seeds or similar
|
||||
################################################################################
|
||||
|
||||
supported_engines = []
|
||||
def initialize_engines():
|
||||
""" Import available engines
|
||||
|
||||
engines = glob.glob(os.path.join(os.path.dirname(__file__), 'engines','*.py'))
|
||||
for engine in engines:
|
||||
e = engine.split(os.sep)[-1][:-3]
|
||||
if len(e.strip()) == 0: continue
|
||||
if e.startswith('_'): continue
|
||||
Return list of available engines
|
||||
"""
|
||||
supported_engines = []
|
||||
|
||||
engines = glob(path.join(path.dirname(__file__), 'engines', '*.py'))
|
||||
for engine in engines:
|
||||
engi = path.basename(engine).split('.')[0].strip()
|
||||
if len(engi) == 0 or engi.startswith('_'):
|
||||
continue
|
||||
try:
|
||||
exec("from engines.%s import %s"%(e,e))
|
||||
supported_engines.append(e)
|
||||
#import engines.[engine]
|
||||
engine_module = __import__(".".join(("engines", engi)))
|
||||
#get low-level module
|
||||
engine_module = getattr(engine_module, engi)
|
||||
#bind class name
|
||||
globals()[engi] = getattr(engine_module, engi)
|
||||
supported_engines.append(engi)
|
||||
except:
|
||||
pass
|
||||
|
||||
def engineToXml(short_name):
|
||||
xml = "<%s>\n"%short_name
|
||||
exec("search_engine = %s()"%short_name, globals())
|
||||
xml += "<name>%s</name>\n"%search_engine.name
|
||||
xml += "<url>%s</url>\n"%search_engine.url
|
||||
xml += "<categories>"
|
||||
if hasattr(search_engine, 'supported_categories'):
|
||||
supported_categories = list(search_engine.supported_categories.keys())
|
||||
supported_categories.remove('all')
|
||||
xml += " ".join(supported_categories)
|
||||
xml += "</categories>\n"
|
||||
xml += "</%s>\n"%short_name
|
||||
return xml
|
||||
return supported_engines
|
||||
|
||||
def displayCapabilities():
|
||||
def engines_to_xml(supported_engines):
|
||||
""" Generates xml for supported engines """
|
||||
tab = " " * 4
|
||||
|
||||
for short_name in supported_engines:
|
||||
search_engine = globals()[short_name]()
|
||||
|
||||
supported_categories = ""
|
||||
if hasattr(search_engine, "supported_categories"):
|
||||
supported_categories = " ".join((key for key in search_engine.supported_categories.keys()
|
||||
if key is not "all"))
|
||||
|
||||
yield "".join((tab, "<", short_name, ">\n",
|
||||
tab, tab, "<name>", search_engine.name, "</name>\n",
|
||||
tab, tab, "<url>", search_engine.url, "</url>\n",
|
||||
tab, tab, "<categories>", supported_categories, "</categories>\n",
|
||||
tab, "</", short_name, ">\n"))
|
||||
|
||||
def displayCapabilities(supported_engines):
|
||||
"""
|
||||
Display capabilities in XML format
|
||||
<capabilities>
|
||||
|
@ -92,67 +108,75 @@ def displayCapabilities():
|
|||
</engine_short_name>
|
||||
</capabilities>
|
||||
"""
|
||||
xml = "<capabilities>"
|
||||
for short_name in supported_engines:
|
||||
xml += engineToXml(short_name)
|
||||
xml += "</capabilities>"
|
||||
xml = "".join(("<capabilities>\n",
|
||||
"".join(engines_to_xml(supported_engines)),
|
||||
"</capabilities>"))
|
||||
print(xml)
|
||||
|
||||
class EngineLauncher(threading.Thread):
|
||||
def __init__(self, engine, what, cat='all'):
|
||||
threading.Thread.__init__(self)
|
||||
self.engine = engine
|
||||
self.what = what
|
||||
self.cat = cat
|
||||
def run(self):
|
||||
if hasattr(self.engine, 'supported_categories'):
|
||||
if self.cat == 'all' or self.cat in list(self.engine.supported_categories.keys()):
|
||||
self.engine.search(self.what, self.cat)
|
||||
elif self.cat == 'all':
|
||||
self.engine.search(self.what)
|
||||
def run_search(engine_list):
|
||||
""" Run search in engine
|
||||
|
||||
if __name__ == '__main__':
|
||||
if len(sys.argv) < 2:
|
||||
raise SystemExit('./nova2.py [all|engine1[,engine2]*] <category> <keywords>\navailable engines: %s'%
|
||||
(','.join(supported_engines)))
|
||||
@param engine_list List with engine, query and category
|
||||
|
||||
if len(sys.argv) == 2:
|
||||
if sys.argv[1] == "--capabilities":
|
||||
displayCapabilities()
|
||||
sys.exit(0)
|
||||
@retval False if any exceptions occured
|
||||
@retval True otherwise
|
||||
"""
|
||||
engine, what, cat = engine_list
|
||||
try:
|
||||
engine = engine()
|
||||
#avoid exceptions due to invalid category
|
||||
if hasattr(engine, 'supported_categories'):
|
||||
cat = cat if cat in engine.supported_categories else "all"
|
||||
engine.search(what, cat)
|
||||
else:
|
||||
raise SystemExit('./nova.py [all|engine1[,engine2]*] <category> <keywords>\navailable engines: %s'%
|
||||
(','.join(supported_engines)))
|
||||
engine.search(what)
|
||||
|
||||
engines_list = [e.lower() for e in sys.argv[1].strip().split(',')]
|
||||
return True
|
||||
except:
|
||||
return False
|
||||
|
||||
def main(args):
|
||||
supported_engines = initialize_engines()
|
||||
|
||||
if not args:
|
||||
raise SystemExit("./nova2.py [all|engine1[,engine2]*] <category> <keywords>\n"
|
||||
"available engines: %s" % (','.join(supported_engines)))
|
||||
|
||||
elif args[0] == "--capabilities":
|
||||
displayCapabilities(supported_engines)
|
||||
return
|
||||
|
||||
elif len(args) < 3:
|
||||
raise SystemExit("./nova2.py [all|engine1[,engine2]*] <category> <keywords>\n"
|
||||
"available engines: %s" % (','.join(supported_engines)))
|
||||
|
||||
#get only unique engines with set
|
||||
engines_list = set(e.lower() for e in args[0].strip().split(','))
|
||||
|
||||
if 'all' in engines_list:
|
||||
engines_list = supported_engines
|
||||
else:
|
||||
#discard un-supported engines
|
||||
engines_list = [engine for engine in engines_list
|
||||
if engine in supported_engines]
|
||||
|
||||
cat = sys.argv[2].lower()
|
||||
if not engines_list:
|
||||
#engine list is empty. Nothing to do here
|
||||
return
|
||||
|
||||
cat = args[1].lower()
|
||||
|
||||
if cat not in CATEGORIES:
|
||||
raise SystemExit('Invalid category!')
|
||||
raise SystemExit(" - ".join(('Invalid category', cat)))
|
||||
|
||||
what = urllib.parse.quote(' '.join(sys.argv[3:]))
|
||||
|
||||
threads = []
|
||||
for engine in engines_list:
|
||||
try:
|
||||
what = urllib.parse.quote(' '.join(args[2:]))
|
||||
if THREADED:
|
||||
exec("l = EngineLauncher(%s(), what, cat)"%engine)
|
||||
threads.append(l)
|
||||
l.start()
|
||||
#child process spawning is controlled min(number of searches, number of cpu)
|
||||
with Pool(min(len(engines_list), cpu_count())) as pool:
|
||||
pool.map(run_search, ([globals()[engine], what, cat] for engine in engines_list))
|
||||
else:
|
||||
exec("e = %s()"%engine)
|
||||
if hasattr(engine, 'supported_categories'):
|
||||
if cat == 'all' or cat in list(e.supported_categories.keys()):
|
||||
e.search(what, cat)
|
||||
elif self.cat == 'all':
|
||||
e.search(what)
|
||||
engine().search(what, cat)
|
||||
except:
|
||||
pass
|
||||
if THREADED:
|
||||
for t in threads:
|
||||
t.join()
|
||||
#py3 note: map is needed to be evaluated for content to be executed
|
||||
all(map(run_search, ([globals()[engine], what, cat] for engine in engines_list)))
|
||||
|
||||
if __name__ == "__main__":
|
||||
main(argv[1:])
|
||||
|
|
|
@ -25,7 +25,7 @@
|
|||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#VERSION: 1.10
|
||||
#VERSION: 1.20
|
||||
|
||||
# Author:
|
||||
# Christophe DUMEZ (chris@qbittorrent.org)
|
||||
|
|
|
@ -26,12 +26,10 @@
|
|||
|
||||
|
||||
def prettyPrinter(dictionary):
|
||||
outtext = ''
|
||||
dictionary['size'] = anySizeToBytes(dictionary['size'])
|
||||
outtext = "|".join((dictionary["link"], dictionary["name"].replace("|", " "), str(dictionary["size"]), str(dictionary["seeds"]), str(dictionary["leech"]), dictionary["engine_url"]))
|
||||
if 'desc_link' in dictionary:
|
||||
outtext = '%s|%s|%s|%s|%s|%s|%s'%(dictionary['link'],dictionary['name'].replace('|',' '),dictionary['size'],dictionary['seeds'],dictionary['leech'],dictionary['engine_url'],dictionary['desc_link'])
|
||||
else:
|
||||
outtext = '%s|%s|%s|%s|%s|%s'%(dictionary['link'],dictionary['name'].replace('|',' '),dictionary['size'],dictionary['seeds'],dictionary['leech'],dictionary['engine_url'])
|
||||
outtext = "|".join((outtext, dictionary["desc_link"]))
|
||||
|
||||
# fd 1 is stdout
|
||||
with open(1, 'w', encoding='utf-8', closefd=False) as utf8stdout:
|
||||
|
@ -60,7 +58,7 @@ def anySizeToBytes(size_string):
|
|||
short_unit = unit.upper()[0]
|
||||
|
||||
# convert
|
||||
units_dict = { 'T': 40, 'G': 30, 'M': 20, 'K': 10 }
|
||||
units_dict = {'T': 40, 'G': 30, 'M': 20, 'K': 10}
|
||||
if short_unit in units_dict:
|
||||
size = size * 2**units_dict[short_unit]
|
||||
return int(size)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue