欧美性猛交XXXX免费看蜜桃,成人网18免费韩国,亚洲国产成人精品区综合,欧美日韩一区二区三区高清不卡,亚洲综合一区二区精品久久

打開(kāi)APP
userphoto
未登錄

開(kāi)通VIP,暢享免費電子書(shū)等14項超值服

開(kāi)通VIP
Lucene 全文檢索實(shí)踐(四)

對于 Lucene 的初步研究已經(jīng)過(guò)去一段時(shí)間,自己感覺(jué)還不是很深入,但由于時(shí)間的關(guān)系,一直也沒(méi)再拿起。應網(wǎng)友的要求,將自己實(shí)踐中寫(xiě)的一些代碼貼出來(lái),希望能對大家有用。程序沒(méi)有做進(jìn)一步的優(yōu)化,只是很簡(jiǎn)單的實(shí)現功能而已,僅供參考。

在實(shí)踐中,我以將 PHP 中文手冊中的 HTML 文件生成索引,然后通過(guò)一個(gè) JSP 對其進(jìn)行全文檢索。
生成索引的 Java 代碼:

/**

 * PHPDocIndexer.java

 * 用于對 PHPDoc HTML 頁(yè)面生成索引文件。

 */

import java.io.File;

import java.io.FileReader;

import java.io.BufferedReader;

import java.io.IOException;

import java.util.Date;

import java.text.DateFormat;

import java.lang.*;

 

import org.apache.lucene.analysis.cjk.CJKAnalyzer;

import org.apache.lucene.index.IndexWriter;

import org.apache.lucene.document.Document;

import org.apache.lucene.document.Field;

import org.apache.lucene.document.DateField;

 

class PHPDocIndexer

{

    public static void main(String[] args) throws ClassNotFoundException, IOException

    {

        try {

            Date start = new Date();

 

            IndexWriter writer = new IndexWriter("/home/nio/indexes-phpdoc", new CJKAnalyzer(), true); //索引保存目錄,必須存在

            indexDocs(writer, new File("/home/nio/phpdoc-zh")); //HTML 文件保存目錄

           

            System.out.println("Optimizing ....");

            writer.optimize();

            writer.close();

           

            Date end = new Date();

          

            System.out.print("Total time: ");

            System.out.println(end.getTime() - start.getTime());

        } catch (Exception e) {

            System.out.println("Class " + e.getClass() + " throws error!\n  errmsg: " + e.getMessage());

        }   //end try

    }   //end main

   

    public static void indexDocs(IndexWriter writer, File file) throws Exception

    {

        if (file.isDirectory()) {

            String[] files = file.list();

            for (int i = 0; i < files.length; i++) {

                indexDocs(writer, new File(file, files[i]));

            }   //end for

        } else if (file.getPath().endsWith(".html")) {  //只對 HTML 文件做索引          

            System.out.print("Add file:" + file + " ....");

            // Add html file ....

            Document doc = new Document();

            doc.add(Field.UnIndexed("file", file.getName()));    //索引文件名

            doc.add(Field.UnIndexed("modified", DateFormat.getDateTimeInstance().format(new Date(file.lastModified()))));   //索引最后修改時(shí)間

 

            String title = "";

            String content = "";

            String status = "start";

            

            FileReader fReader = new FileReader(file);

            BufferedReader bReader = new BufferedReader(fReader);

            String line = bReader.readLine();

 

            while (line != null) {               

                content += line;

                //截取 HTML 標題 <title>

                if ("start" == status && line.equalsIgnoreCase("><TITLE")) {

                    status = "match";

                } else if ("match" == status) {

                    title = line.substring(1, line.length() - 7);

                    doc.add(Field.Text("title", title));    //索引標題

                    status = "end";

                }   //end if

                line = bReader.readLine();

            }   //end while

            bReader.close();

            fReader.close();

            doc.add(Field.Text("content", content.replaceAll("<[^<>]+>", "")));  //索引內容

            writer.addDocument(doc);

            System.out.println(" [OK]");

        }   //end if

    }

 

}   //end class



索引生成完之后,就需要一個(gè)檢索頁(yè)面,下邊是搜索頁(yè)面(search.jsp)的代碼:

<%@ page language="java" import="javax.servlet.*, javax.servlet.http.*, java.io.*, java.util.Date, java.util.ArrayList, java.util.regex.*, org.apache.lucene.analysis.*, org.apache.lucene.document.*, org.apache.lucene.index.*, org.apache.lucene.search.*, org.apache.lucene.queryParser.*, org.apache.lucene.analysis.Token, org.apache.lucene.analysis.TokenStream, org.apache.lucene.analysis.cjk.CJKAnalyzer, org.apache.lucene.analysis.cjk.CJKTokenizer, com.chedong.weblucene.search.WebLuceneHighlighter" %>

<%@ page contentType="text/html;charset=GB2312" %>

<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"

    "http://www.w3.org/TR/html4/strict.dtd">

<html>

<head>

    <meta http-equiv="Content-Type" content="text/html; charset=gb2312" />

    <title>PHPDoc - PHP 簡(jiǎn)體中文手冊全文檢索</title>

    <base target="main"><!-- 由于使用了 Frame,所以指定 target main 窗口顯示 -->

    <style>

        body {background-color: white; margin: 4px}

        body, input, div {font-family: Tahoma; font-size: 9pt}

        body, div {line-height: 18px}

        u {color: red}

        b {color: navy}

        form {padding: 0px; margin: 0px}

        .txt {border: 1px solid black}

        .f {padding: 4px; margin-bottom: 16px; background-color: #E5ECF9; border-top: 1px solid #3366CC; border-bottom: 1px solid #3366CC; text-align: center;}

        .d, .o {padding-left: 16px}

        .d {color: gray}

        .o {color: green}

        .o a {color: #7777CC}

    </style>

    <script language="JavaScript">

        function gotoPage(i)

        {

            document.frm.page.value = i;

            document.frm.submit();

        }   //end function

    </script>

</head>

 

<body>

<%

String keyVal = null;

String pageVal = null;

int offset = 0;

int curPage = 0;

int pages;

final int ROWS = 50;

 

//獲取 GET 參數

try {

    byte[] keyValByte = request.getParameter("key").getBytes("ISO8859_1");  //查找關(guān)鍵字

    keyVal = new String(keyValByte);

    pageVal = request.getParameter("page");    //頁(yè)碼

} catch (Exception e) {

    //do nothing;

}

if (keyVal == null)

    keyVal = new String("");

%>

<div class="f">

    <form name="frm" action="./index.jsp" method="GET" onsubmit="this.page.value=‘0‘;return true;" target="_self">

        <input type="text" name="key" class="txt" size="40" value="<%=keyVal%>" />

        <input type="hidden" name="page" value="<%=pageVal%>" />

        <input type="submit" value="  " /><br />

        <font color="green">提示:可使用多個(gè)關(guān)鍵字(使用空格隔開(kāi))提高搜索的準確率。</font>

    </form>

    <script language="JavaScript">

        document.frm.key.focus();

    </script>

</div>

<%

if (keyVal != null && keyVal.length() > 0) {

    try {

        curPage = Integer.parseInt(pageVal);    //將當前頁(yè)轉換成整數

    } catch (Exception e) {

        //do nothing;

    }   //end try

    try {

        Date startTime = new Date();

        keyVal = keyVal.toLowerCase().replaceAll("(or|and)", "").trim().replaceAll("\\s+", " AND ");

        Searcher searcher = new IndexSearcher("/home/nio/indexes-phpdoc"); //索引目錄

        Analyzer analyzer = new CJKAnalyzer();

        String[] fields = {"title", "content"};        

        Query query = MultiFieldQueryParser.parse(keyVal, fields, analyzer);

        Hits hits = searcher.search(query);

 

        StringReader in = new StringReader(keyVal);

        TokenStream tokenStream = analyzer.tokenStream("", in);

        ArrayList al = new ArrayList();

        for (Token token = tokenStream.next(); token != null; token = tokenStream.next()) {

            al.add(token.termText());

        }   //end for

       

        //總頁(yè)數

        pages = (new Integer(hits.length()).doubleValue() % ROWS != 0) ? (hits.length() / ROWS) + 1 : (hits.length() / ROWS);

 

        //當前頁(yè)碼

        if (curPage < 1)

            curPage = 1;

        else if (curPage > pages)

            curPage = pages;

       

        //起始、終止下標

        offset = (curPage - 1) * ROWS;

        int end = Math.min(hits.length(), offset + ROWS);

       

        //循環(huán)輸出查詢(xún)結果

        WebLuceneHighlighter hl = new WebLuceneHighlighter(al);

        for (int i = offset; i < end; i++) {

            Document doc = hits.doc(i);

            %>

            <div class="t"><a href="/~nio/phpdoc-zh/<%=doc.get("file")%>"><%=hl.highLight(doc.get("title"))%></a></div>

            <div class="d"><%=hl.highLight(doc.get("content").replaceAll("\n", "  "), 100)%> ……</div>

            <div class="o">

                /~nio/phpdoc-zh/<%=doc.get("file")%>

                 - 

                <%=doc.get("modified")%>

            </div>

            <br />

            <%

        }   //end for

        searcher.close();

        Date endTime = new Date();

        %>

            <div class="f">

                檢索總共耗時(shí) <b><%=((endTime.getTime() - startTime.getTime()) / 1000.0)%></b> 秒,約有 <b><%=hits.length()%></b> 項符合條件的記錄,共 <b><%=pages%></b> 頁(yè)

        <%

        if (curPage > 1 && pages > 1) {

        %>

             | <a href="javascript:gotoPage(<%=(curPage-1)%>);" target="_self">上一頁(yè)</a>

        <%

        }   //end if

        if (curPage < pages && pages > 1) {

        %>

             | <a href="javascript:gotoPage(<%=(curPage+1)%>)" target="_self">下一頁(yè)</a>

        <%

        }   //end if

    } catch (Exception e) {

    %>

        <!-- <%=e.getClass()%> 導致錯誤:<%=e.getMessage()%> -->

    <%

    }   //end if

}   //end if

%>   

</body>

</html>



在線(xiàn)示例:PHP 手冊(簡(jiǎn)體中文)。

本站僅提供存儲服務(wù),所有內容均由用戶(hù)發(fā)布,如發(fā)現有害或侵權內容,請點(diǎn)擊舉報。
打開(kāi)APP,閱讀全文并永久保存 查看更多類(lèi)似文章
猜你喜歡
類(lèi)似文章
Lucene 全文檢索實(shí)踐
Lucene 3.6.1:中文分詞、創(chuàng )建索引庫、排序、多字段分頁(yè)查詢(xún)以及高亮顯示
用Lucene檢索數據庫
lucene多種搜索方式詳解例子
[原創(chuàng )]全文搜索引擎Lucene學(xué)習筆記(頁(yè) 1) - 『 編程設計 』 - 青韶論壇 湘...
Lucene版Hello world(世界,你好)
更多類(lèi)似文章 >>
生活服務(wù)
分享 收藏 導長(cháng)圖 關(guān)注 下載文章
綁定賬號成功
后續可登錄賬號暢享VIP特權!
如果VIP功能使用有故障,
可點(diǎn)擊這里聯(lián)系客服!

聯(lián)系客服

欧美性猛交XXXX免费看蜜桃,成人网18免费韩国,亚洲国产成人精品区综合,欧美日韩一区二区三区高清不卡,亚洲综合一区二区精品久久