鉴于目前的种子库都不全,不新,或者干脆崩了。
我回答一个技术性的方法,直接实现BEP-009协议来向别人查询种子
代码来自我的DHT爬虫 LEXUGE/L-Spider
代码有部分拼写错误,请谅解
def download_metadata(address, infohash, timeout=15): if ip_black_list(address[0]): return try: the_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) the_socket.settimeout(15) the_socket.connect(address) # handshake send_handshake(the_socket, infohash) packet = the_socket.recv(4096) # handshake error if not check_handshake(packet, infohash): try: the_socket.close() except: return return # ext handshake send_ext_handshake(the_socket) packet = the_socket.recv(4096) # get ut_metadata and metadata_size ut_metadata, metadata_size = get_ut_metadata(packet), get_metadata_size(packet) # print 'ut_metadata_size: ', metadata_size # request each piece of metadata metadate_old="" metadata = [] for piece in range(int(math.ceil(metadata_size / (16.0 * 1024)))): request_metadata(the_socket, ut_metadata, piece) packet = recvall(the_socket, timeout) # the_socket.recv(1024*17) # metadata.append(packet[packet.index("ee") + 2:]) metadata = "".join(metadata) info = {} #vierfy metadata chech_metadata=sha1(str(metadata)).hexdigest() if chech_metadata.upper()!=infohash.encode("hex").upper(): print "[check infohash] failed.Check_sum:"+chech_metadata.upper() print "[check infohash] infohash:"+infohash.encode("hex").upper() try: print "\r\n\r\n" the_socket.close() except: print "\r\n\r\n" return return else: print "[check infohash] successful.Check_sum:"+chech_metadata.upper() print "[check infohash] infohash:"+infohash.encode("hex").upper() meta_data = bdecode(metadata) #torrent file metadate_old="d4:info"+str(metadata)+"e" del metadata info['hash_id'] = infohash.encode("hex").upper() if meta_data.has_key('name'): info["hash_name"] = meta_data["name"].strip() else: info["hash_name"] = '' if meta_data.has_key('length'): info['hash_size'] = meta_data['length'] else: info['hash_size'] = 0 if meta_data.has_key('files'): info['files'] = meta_data['files'] for item in info['files']: # print item if item.has_key('length'): info['hash_size'] += item['length'] #info['files'] = json.dumps(info['files'], ensure_ascii=False) #info['files'] = info['files'].replace("\"path\"", "\"p\"").replace("\"length\"", "\"l\"") else: info['files'] = '' info['a_ip'] = address[0] info['hash_size'] = str(info['hash_size']) #print info, "\r\n\r\n" storage_info(info,metadate_old,address) del info gc.collect() except socket.timeout: try: the_socket.close() except: return except socket.error: try: the_socket.close() except: return except Exception, e: try: # print e # traceback.print_exc() the_socket.close() except: return