网盘搜索引擎源码公布

网盘搜索引擎源码公布

公布源码前,请允许我对常见的网盘搜索引擎原理做介绍,目前国内的网盘搜索引擎分两种:

google自定义搜索,代表如西林街,盘搜

拥有自主爬虫,索引程序,代表如去转盘,胖次

2019年10月修正文章:

目前自主爬虫的获取方式已经基本失效,google自定义的方式获得的资源质量也不是太高,当前最省事,简洁的 方式可以采用直接调用接口

推荐用这个平台:


网盘接口:

先说前一种,站在技术的角度,这种做法非常low——调用google搜索引擎的数据,返回到自己的网站上,懂一点技术甚至不懂技术都能搞,但前提是你要会翻墙(google国内是被屏蔽的,你懂得),由于过于简单,不想多数了,核心源码如下:

	<div class="con">
        <div style="text-align:center">
                    <div id="cse-search-results"></div>
                    <script type="text/javascript">
                      var googleSearchIframeName = "cse-search-results";
                      var googleSearchFormName = "cse-search-box";
                      var googleSearchFrameWidth = 800;
					  var googleSearchResizeIframe = true;
                      var googleSearchDomain = "www.google.com";
                      var googleSearchPath = "/cse";
                    </script>
                    <script type="text/javascript" src="http://www.google.com/afsonline/show_afs_search.js"></script>
                <br>
        </div>    
    </div>

这里附上源代码,点我下载,再说一遍,请先翻墙再运行!翻墙6分钟搞定,代码4分钟,整个过程也就10分钟绝对足够了!

再说后一种,这一种就比较有难度了,核心部分包括爬虫+索引,爬虫代码我之前公布过一份,这里也贴上部分,详细请关注我的博客《python爬虫爬取百度网盘-怎么做一个百度网盘搜索引擎》

#####coding: utf8

"""

author:haoning

create time: 2015-8-15

"""

import re #####正则表达式模块

import urllib2 #####获取URLs的组件

import time

from Queue import Queue

import threading, errno, datetime

import json

import requests #Requests is an Apache2 Licensed HTTP library

import MySQLdb as mdb

 

DB_HOST = '127.0.0.1'

DB_USER = 'root'

DB_PASS = ''


#####以下是正则匹配规则

re_start = re.compile(r'start=(\d+)') #\d 表示0-9 任意一个数字 后面有+号 说明这个0-9单个数位出现一到多次 比如21312314

re_uid = re.compile(r'query_uk=(\d+)') #查询编号

re_urlid = re.compile(r'&urlid=(\d+)') #url编号

 

ONEPAGE = 20 #一页数据量

ONESHAREPAGE = 20 #一页分享连接量

 

#####缺少专辑列表

URL_SHARE = 'http://yun.baidu.com/pcloud/feed/getsharelist?auth_type=1&start={start}&limit=20&query_uk={uk}&urlid={id}' #获得分享列表

"""

{"feed_type":"share","category":6,"public":"1","shareid":"1541924625","data_id":"2418757107690953697","title":"\u5723\u8bde\u58c1\u7eb8\u5927\u6d3e\u9001","third":0,"clienttype":0,"filecount":1,"uk":1798788396,"username":"SONYcity03","feed_time":1418986714000,"desc":"","avatar_url":"http:\/\/himg.bdimg.com\/sys\/portrait\/item\/1b6bf333.jpg","dir_cnt":1,"filelist":[{"server_filename":"\u5723\u8bde\u58c1\u7eb8\u5927\u6d3e\u9001","category":6,"isdir":1,"size":1024,"fs_id":870907642649299,"path":"%2F%E5%9C%A3%E8%AF%9E%E5%A3%81%E7%BA%B8%E5%A4%A7%E6%B4%BE%E9%80%81","md5":"0","sign":"1221d7d56438970225926ad552423ff6a5d3dd33","time_stamp":1439542024}],"source_uid":"871590683","source_id":"1541924625","shorturl":"1dDndV6T","vCnt":34296,"dCnt":7527,"tCnt":5056,"like_status":0,"like_count":60,"comment_count":19},

public:公开分享

title:文件名称

uk:用户编号

"""

URL_FOLLOW = 'http://yun.baidu.com/pcloud/friend/getfollowlist?query_uk={uk}&limit=20&start={start}&urlid={id}' #获得订阅列表

"""

{"type":-1,"follow_uname":"\u597d\u55e8\u597d\u55e8\u554a","avatar_url":"http:\/\/himg.bdimg.com\/sys\/portrait\/item\/979b832f.jpg","intro":"\u9700\u8981\u597d\u8d44\u6599\u52a0994798392","user_type":0,"is_vip":0,"follow_count":2,"fans_count":2276,"follow_time":1415614418,"pubshare_count":36,"follow_uk":2603342172,"album_count":0},

follow_uname:订阅名称

fans_count:粉丝数

"""

URL_FANS = 'http://yun.baidu.com/pcloud/friend/getfanslist?query_uk={uk}&limit=20&start={start}&urlid={id}' # 获取关注列表

"""

{"type":-1,"fans_uname":"\u62e8\u52a8\u795e\u7684\u5fc3\u7eea","avatar_url":"http:\/\/himg.bdimg.com\/sys\/portrait\/item\/d5119a2b.jpg","intro":"","user_type":0,"is_vip":0,"follow_count":8,"fans_count":39,"follow_time":1439541512,"pubshare_count":15,"fans_uk":288332613,"album_count":0}

avatar_url:头像

fans_uname:用户名

"""

 

QNUM = 1000

hc_q = Queue(20) #请求队列

hc_r = Queue(QNUM) #接收队列

success = 0

failed = 0

 

def req_worker(inx): #请求

    s = requests.Session() #请求对象

    while True:

        req_item = hc_q.get() #获得请求项

        

        req_type = req_item[0] #请求类型,分享?订阅?粉丝?

        url = req_item[1] #url

        r = s.get(url) #通过url获得数据

        hc_r.put((r.text, url)) #将获得数据文本和url放入接收队列

        print "req_worker#", inx, url #inx 线程编号; url 分析了的 url

        

def response_worker(): #处理工作

    dbconn = mdb.connect(DB_HOST, DB_USER, DB_PASS, 'baiduyun', charset='utf8')

    dbcurr = dbconn.cursor()

    dbcurr.execute('SET NAMES utf8')

    dbcurr.execute('set global wait_timeout=60000') #以上皆是数据库操作

    while True:

        """

        #正则备注

        match() 决定 RE 是否在字符串刚开始的位置匹配

        search() 扫描字符串,找到这个 RE 匹配的位置

        findall() 找到 RE 匹配的所有子串,并把它们作为一个列表返回

        finditer() 找到 RE 匹配的所有子串,并把它们作为一个迭代器返回

                  百度页面链接:http://pan.baidu.com/share/link?shareid=3685432306&uk=1798788396&from=hotrec

        uk 其实用户id值

        """

        metadata, effective_url = hc_r.get() #获得metadata(也就是前面的r.text)和有效的url

        #print "response_worker:", effective_url

        try:

            tnow = int(time.time()) #获得当前时间

            id = re_urlid.findall(effective_url)[0] #获得re_urlid用户编号

            start = re_start.findall(effective_url)[0] #获得start用户编号

            if True:

                if 'getfollowlist' in effective_url: #type = 1,也就是订阅类

                    follows = json.loads(metadata) #以将文本数据转化成json数据格式返回

                    uid = re_uid.findall(effective_url)[0] #获得re_uid,查询编号

                    if "total_count" in follows.keys() and follows["total_count"]>0 and str(start) == "0": #获得订阅数量

                        for i in range((follows["total_count"]-1)/ONEPAGE): #开始一页一页获取有用信息

                            try:

                                dbcurr.execute('INSERT INTO urlids(uk, start, limited, type, status) VALUES(%s, %s, %s, 1, 0)' % (uid, str(ONEPAGE*(i+1)), str(ONEPAGE)))

                                #存储url编号,订阅中有用户编号,start表示从多少条数据开始获取,初始status=0为未分析状态

                            except Exception as ex:

                                print "E1", str(ex)

                                pass

                    

                    if "follow_list" in follows.keys(): #如果订阅者也订阅了,即拥有follow_list

                        for item in follows["follow_list"]:

                            try:

                                dbcurr.execute('INSERT INTO user(userid, username, files, status, downloaded, lastaccess) VALUES(%s, "%s", 0, 0, 0, %s)' % (item['follow_uk'], item['follow_uname'], str(tnow)))

                                #存储订阅这的用户编号,用户名,入库时间

                            except Exception as ex:

                                print "E13", str(ex)

                                pass

                    else:

                        print "delete 1", uid, start

                        dbcurr.execute('delete from urlids where uk=%s and type=1 and start>%s' % (uid, start))

                elif 'getfanslist' in effective_url: #type = 2,也就是粉丝列表

                    fans = json.loads(metadata)

                    uid = re_uid.findall(effective_url)[0]

                    if "total_count" in fans.keys() and fans["total_count"]>0 and str(start) == "0":

                        for i in range((fans["total_count"]-1)/ONEPAGE):

                            try:

                                dbcurr.execute('INSERT INTO urlids(uk, start, limited, type, status) VALUES(%s, %s, %s, 2, 0)' % (uid, str(ONEPAGE*(i+1)), str(ONEPAGE)))

                            except Exception as ex:

                                print "E2", str(ex)

                                pass

                    

                    if "fans_list" in fans.keys():

                        for item in fans["fans_list"]:

                            try:

                                dbcurr.execute('INSERT INTO user(userid, username, files, status, downloaded, lastaccess) VALUES(%s, "%s", 0, 0, 0, %s)' % (item['fans_uk'], item['fans_uname'], str(tnow)))

                            except Exception as ex:

                                print "E23", str(ex)

                                pass

                    else:

                        print "delete 2", uid, start

                        dbcurr.execute('delete from urlids where uk=%s and type=2 and start>%s' % (uid, start))

                else: #type=0,也即是分享列表

                    shares = json.loads(metadata)

                    uid = re_uid.findall(effective_url)[0]

                    if "total_count" in shares.keys() and shares["total_count"]>0 and str(start) == "0":

                        for i in range((shares["total_count"]-1)/ONESHAREPAGE):

                            try:

                                dbcurr.execute('INSERT INTO urlids(uk, start, limited, type, status) VALUES(%s, %s, %s, 0, 0)' % (uid, str(ONESHAREPAGE*(i+1)), str(ONESHAREPAGE)))

                            except Exception as ex:

                                print "E3", str(ex)

                                pass

                    if "records" in shares.keys():

                        for item in shares["records"]:

                            try:

                                dbcurr.execute('INSERT INTO share(userid, filename, shareid, status) VALUES(%s, "%s", %s, 0)' % (uid, item['title'], item['shareid'])) #item['title']恰好是文件名称

                                #返回的json信息:

                            except Exception as ex:

                                #print "E33", str(ex), item

                                pass

                    else:

                        print "delete 0", uid, start

                        dbcurr.execute('delete from urlids where uk=%s and type=0 and start>%s' % (uid, str(start)))

                dbcurr.execute('delete from urlids where id=%s' % (id, ))

                dbconn.commit()

        except Exception as ex:

            print "E5", str(ex), id

    dbcurr.close()

    dbconn.close() #关闭数据库

    

def worker():

    global success, failed

    dbconn = mdb.connect(DB_HOST, DB_USER, DB_PASS, 'baiduyun', charset='utf8')

    dbcurr = dbconn.cursor()

    dbcurr.execute('SET NAMES utf8')

    dbcurr.execute('set global wait_timeout=60000')

    #以上是数据库相关设置

    while True:

 

        #dbcurr.execute('select * from urlids where status=0 order by type limit 1')

        dbcurr.execute('select * from urlids where status=0 and type>0 limit 1') #type>0,为非分享列表

        d = dbcurr.fetchall()

        #每次取出一条数据出来

        #print d

        if d: #如果数据存在

            id = d[0][0] #请求url编号

            uk = d[0][1] #用户编号

            start = d[0][2]

            limit = d[0][3]

            type = d[0][4] #哪种类型

            dbcurr.execute('update urlids set status=1 where id=%s' % (str(id),)) #状态更新为1,已经访问过了

            url = ""

            if type == 0: #分享

                url = URL_SHARE.format(uk=uk, start=start, id=id).encode('utf-8') #分享列表格式化

                #query_uk uk 查询编号

                #start

                #urlid id url编号

            elif  type == 1: #订阅

                url = URL_FOLLOW.format(uk=uk, start=start, id=id).encode('utf-8') #订阅列表格式化

            elif type == 2: #粉丝

                url = URL_FANS.format(uk=uk, start=start, id=id).encode('utf-8') #关注列表格式化

            if url:

                hc_q.put((type, url)) #如果url存在,则放入请求队列,type表示从哪里获得数据

                #通过以上的url就可以获得相应情况下的数据的json数据格式,如分享信息的,订阅信息的,粉丝信息的

                

            #print "processed", url

        else: #否则从订阅者或者粉丝的引出人中获得信息来存储,这个过程是爬虫树的下一层扩展

            dbcurr.execute('select * from user where status=0 limit 1000')

            d = dbcurr.fetchall()

            if d:

                for item in d:

                    try:

                        dbcurr.execute('insert into urlids(uk, start, limited, type, status) values("%s", 0, %s, 0, 0)' % (item[1], str(ONESHAREPAGE)))

                        #uk 查询号,其实是用户编号

                        #start 从第1条数据出发获取信息

                        #

                        dbcurr.execute('insert into urlids(uk, start, limited, type, status) values("%s", 0, %s, 1, 0)' % (item[1], str(ONEPAGE)))

                        dbcurr.execute('insert into urlids(uk, start, limited, type, status) values("%s", 0, %s, 2, 0)' % (item[1], str(ONEPAGE)))

                        dbcurr.execute('update user set status=1 where userid=%s' % (item[1],)) #做个标志,该条数据已经访问过了

                        #跟新了分享,订阅,粉丝三部分数据

                    except Exception as ex:

                        print "E6", str(ex)

            else:

                time.sleep(1)

                

        dbconn.commit()

    dbcurr.close()

    dbconn.close()

        

def main():

    print 'starting at:',now()

    for item in range(16):    

        t = threading.Thread(target = req_worker, args = (item,))

        t.setDaemon(True)

        t.start() #请求线程开启,共开启16个线程

    s = threading.Thread(target = worker, args = ())

    s.setDaemon(True)

    s.start() #worker线程开启

    response_worker()  #response_worker开始工作

    print 'all Done at:', now()  

代码比较久远了,但原理这些绝对没有问题,麻烦仔细看,认真研究,不懂技术的没有办法请研究google自定义吧!爬虫搞定后接下来就是索引,索引这个去转盘网目前使用的是lucene开源引擎,这个有点多了,我还是附上点核心代码,具体的代码麻烦看我在博客园上写的帖子:《java+lucene中文分词,来看看百度究竟是怎么找到你想要的(十分重要,楼主幸苦之作)》

package com.tray.indexData;
import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
  
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;
  
import com.tray.bean.SerachResult;
import com.tray.common.tools.DateFormater;
  
public class LuceneSearch {
      
    private static String DISC_URL = "/home/indexData/data";
      
    static {
        String os = System.getProperty("os.name"); 
        if(os.toLowerCase().startsWith("win")){ 
            DISC_URL = "E:\\indexData\\data";
        }
        else{
            DISC_URL ="/home/indexData/data";
        }
    }
          
    //指定分词器
    private Analyzer analyzer=new IKAnalyzer();
    private static Directory directory;
    //配置
    private static IndexWriterConfig iwConfig;
    //配置IndexWriter
    private static IndexWriter writer; 
    private static File indexFile = null; 
      
    private static Version version = Version.LUCENE_36;
      
    private final int PAPGESIZE=10;
  
    /**
     * 全量索引
     * @Author haoning
     */
    public void init() throws Exception {
          
        try {
            indexFile = new File(DISC_URL);
            if (!indexFile.exists()) {
                indexFile.mkdir();
            }
            directory=FSDirectory.open(indexFile); 
            //配置IndexWriterConfig 
            iwConfig = new IndexWriterConfig(version,analyzer); 
            iwConfig.setOpenMode(OpenMode.CREATE_OR_APPEND); 
                //创建写索引对象 
            writer = new IndexWriter(directory,iwConfig);  
        } catch (Exception e) {
        }
    }
      
    public void closeWriter(){
        try {
            writer.close();
        } catch (CorruptIndexException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
      
    public void commit(){
          
        try {
            writer.commit();
        } catch (CorruptIndexException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
      
    /**
     * 一个一个索引
     * @Author haoning
     */
    public void singleIndex(Document doc) throws Exception {
        writer.addDocument(doc);
    }
      
    /**
     * 一个跟新
     * @Author haoning
     */
    public void singleUpdate(Document doc) throws Exception {
        Term term = new Term("url", doc.get("url"));
        writer.updateDocument(term,doc);
    }
      
    /**
     * 全量索引
     * @Author haoning
     */
    public void fullIndex(Document[] documentes) throws Exception {
          
        writer.deleteAll();
        for (Document document : documentes) {
            writer.addDocument(document);
        }
        writer.commit();
    }
      
    /**
     * 根据id删除索引
     * @Author haoning
     */
    public void deleteIndex(Document document)throws Exception{
        Term term = new Term("url", document.get("url"));//url才是唯一标志
        writer.deleteDocuments(term);
        writer.commit();
    }
      
    /**
     * 根据id增量索引
     * @Author haoning
     */
    public void updateIndex(Document[] documentes) throws Exception{
        for (Document document : documentes) {
            Term term = new Term("url", document.get("url"));
            writer.updateDocument(term, document);
        }
        writer.commit();
    }
      
    /**
     * 直接查询
     * @Author haoning
     */
    public void simpleSearch(String filedStr,String queryStr,int page, int pageSize) throws Exception{
        File indexDir = new File(DISC_URL); 
        //索引目录 
        Directory dir=FSDirectory.open(indexDir); 
        //根据索引目录创建读索引对象 
        IndexReader reader = IndexReader.open(dir); 
        //搜索对象创建 
        IndexSearcher searcher = new IndexSearcher(reader);
        TopScoreDocCollector topCollector = TopScoreDocCollector.create(searcher.maxDoc(), false);
          
        Term term = new Term(filedStr, queryStr);
        Query query = new TermQuery(term);
        searcher.search(query, topCollector);
        ScoreDoc[] docs = topCollector.topDocs((page-1)*pageSize, pageSize).scoreDocs;
          
        printScoreDoc(docs, searcher);
    }
      
    /**
     * 高亮查询
     * @Author haoning
     */
    public Map<String, Object> highLightSearch(String filed,String keyWord,int curpage, int pageSize) throws Exception{
        List<SerachResult> list=new ArrayList<SerachResult>();
        Map<String,Object> map = new HashMap<String,Object>();
        if (curpage <= 0) {
            curpage = 1;
        }
        if (pageSize <= 0 || pageSize>20) {
             pageSize = PAPGESIZE;
        }
        File indexDir = new File(DISC_URL); //索引目录  
        Directory dir=FSDirectory.open(indexDir);//根据索引目录创建读索引对象   
        IndexReader reader = IndexReader.open(dir);//搜索对象创建   
        IndexSearcher searcher = new IndexSearcher(reader);
          
        int start = (curpage - 1) * pageSize;
          
        Analyzer analyzer = new IKAnalyzer(true);
        QueryParser queryParser = new QueryParser(Version.LUCENE_36, filed, analyzer);
        queryParser.setDefaultOperator(QueryParser.AND_OPERATOR);
        Query query = queryParser.parse(keyWord);
          
        int hm = start + pageSize;
        TopScoreDocCollector res = TopScoreDocCollector.create(hm, false);
        searcher.search(query, res);
          
        SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<span style='color:red'>", "</span>");
        Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query));
          
        long amount = res.getTotalHits();
        //long pages = (rowCount - 1) / pageSize + 1; //计算总页数
          
        map.put("amount",amount);//总共多少条记录
          
        TopDocs tds = res.topDocs(start, pageSize);
        ScoreDoc[] sd = tds.scoreDocs;
          
        for (int i = 0; i < sd.length; i++) {
            Document doc = searcher.doc(sd[i].doc);
            String temp=doc.get("name");
            //做高亮处理
            TokenStream ts = analyzer.tokenStream("name", new StringReader(temp));
              
            SerachResult record=new SerachResult();
            String name = highlighter.getBestFragment(ts,temp);
            String skydirverName=doc.get("skydirverName");
            String username=doc.get("username");
            String shareTime=doc.get("shareTime");
            String describ=doc.get("describ");
            String typeId=doc.get("typeId");
            String id=doc.get("id");
            String url=doc.get("url");
              
            record.setName(name);
            record.setSkydriverName(skydirverName);
            record.setUsername(username);
            record.setShareTime(DateFormater.getFormatDate(shareTime,"yyyy-MM-dd HH:mm:ss"));
            record.setDescrib(describ);
            record.setTypeId(Integer.parseInt(typeId));
            record.setId(new BigInteger(id));
            record.setUrl(url);
            list.add(record);
              
            /*System.out.println("name:"+name);
            System.out.println("skydirverName:"+skydirverName);
            System.out.println("username:"+username);
            System.out.println("shareTime:"+shareTime);
            System.out.println("describ:"+describ);
            System.out.println("typeId:"+typeId);
            System.out.println("id:"+id);
            System.out.println("url:"+url);*/
        }
        map.put("source",list);
        return map;
    }
      
    /**
     * 根据前缀查询
     * @Author haoning
     */
    public void prefixSearch(String filedStr,String queryStr) throws Exception{
        File indexDir = new File(DISC_URL); 
        //索引目录 
        Directory dir=FSDirectory.open(indexDir); 
        //根据索引目录创建读索引对象 
        IndexReader reader = IndexReader.open(dir); 
        //搜索对象创建 
        IndexSearcher searcher = new IndexSearcher(reader);
          
        Term term = new Term(filedStr, queryStr);
        Query query = new PrefixQuery(term);
          
        ScoreDoc[] docs = searcher.search(query, 3).scoreDocs;
        printScoreDoc(docs, searcher);
    }
      
    /**
     * 通配符查询
     * @Author haoning
     */
    public void wildcardSearch(String filedStr,String queryStr) throws Exception{
        File indexDir = new File(DISC_URL); 
        //索引目录 
        Directory dir=FSDirectory.open(indexDir); 
        //根据索引目录创建读索引对象 
        IndexReader reader = IndexReader.open(dir); 
        //搜索对象创建 
        IndexSearcher searcher = new IndexSearcher(reader);
          
        Term term = new Term(filedStr, queryStr);
        Query query = new WildcardQuery(term);
        ScoreDoc[] docs = searcher.search(query, 3).scoreDocs;
        printScoreDoc(docs, searcher);
    }
      
    /**
     * 分词查询
     * @Author haoning
     */
    public void analyzerSearch(String filedStr,String queryStr) throws Exception{
        File indexDir = new File(DISC_URL); 
        //索引目录 
        Directory dir=FSDirectory.open(indexDir); 
        //根据索引目录创建读索引对象 
        IndexReader reader = IndexReader.open(dir); 
        //搜索对象创建 
        IndexSearcher searcher = new IndexSearcher(reader);
          
        QueryParser queryParser = new QueryParser(version, filedStr, analyzer);
        Query query = queryParser.parse(queryStr);
          
        ScoreDoc[] docs = searcher.search(query, 3).scoreDocs;
        printScoreDoc(docs, searcher);
    }
      
    /**
     * 多属性分词查询
     * @Author haoning
     */
    public void multiAnalyzerSearch(String[] filedStr,String queryStr) throws Exception{
        File indexDir = new File(DISC_URL); 
        //索引目录 
        Directory dir=FSDirectory.open(indexDir); 
        //根据索引目录创建读索引对象 
        IndexReader reader = IndexReader.open(dir); 
        //搜索对象创建 
        IndexSearcher searcher = new IndexSearcher(reader);
        QueryParser queryParser = new MultiFieldQueryParser(version, filedStr, analyzer);
        Query query = queryParser.parse(queryStr);
          
        ScoreDoc[] docs = searcher.search(query, 3).scoreDocs;
        printScoreDoc(docs, searcher);
    }
      
    public void printScoreDoc(ScoreDoc[] docs,IndexSearcher searcher)throws Exception{
        for (int i = 0; i < docs.length; i++) {
            List<Fieldable> list = searcher.doc(docs[i].doc).getFields();
            for (Fieldable fieldable : list) {
                String fieldName = fieldable.name();
                String fieldValue = fieldable.stringValue();
                System.out.println(fieldName+" : "+fieldValue);
            }
        }
    }
}
  

太多了也就附上以上这段代码,还是麻烦读者花点心思再研究下!再剩下的东西也就是web技术了,有的人用php,有的人用jsp,还有asp等等的,已经超出了本帖的内容。好了,核心的东西想必我已经说的很清楚了,喜欢本文请点个赞,爱研究的程序猿也可以分享给好友,同学一起研究下,感谢阅读!

编辑于 2019-10-08 13:02