关于文章 whoosh

jieba search whoosh 分词 搜索

博客使用whoosh+jieba作搜索

whoosh作引擎,jieba作分词,实现搜索功能

博客一直没有搜索,本来想用es ,但是想用更硬核一点的所以选用了whoosh ,whoosh是纯py编写的

上代码

单例模式初始化一次whoosh

更新后注销调对象

唯一难受的是 博客从30m/上下的内存占用 彪升到226m!!!!!!!!!!

网上的whoosh 都是贼简单的demo

import os
from jieba.analyse import ChineseAnalyzer
from whoosh.qparser import MultifieldParser
from services import Singleton
from logic.articleDao import articleDao
from whoosh.index import create_in
from whoosh.fields import Schema,ID,TEXT
ana=ChineseAnalyzer()
class Search(metaclass=Singleton):
    def __init__(self):
        self.list = articleDao.listAllNoPage()
        schema = Schema(
            id=ID(stored=True, analyzer=ana),
            title=TEXT(stored=True, analyzer=ana),
            content=TEXT(stored=True, analyzer=ana),
            keyword=TEXT(stored=True, analyzer=ana),
            desc=TEXT(stored=True, analyzer=ana),)
        if not os.path.exists("index"):
            os.mkdir("index")
        ix= create_in("index",schema)
        writer = ix.writer()
        for art in self.list:
            writer.add_document(
                            id=str(art.id),
                           title=art.title,
                           content=art.content,
                           keyword=art.keyword,
                           desc=art.desc)
        writer.commit()
        self.ix= ix

    def search(self,keyword):
        searcher = self.ix.searcher()
        query = MultifieldParser(["content","title","desc","keyword"],schema=self.ix.schema).parse(keyword)
        res=searcher.search(query,limit=len(self.list))
        result = []
        for r in res:
            result.append(r.get("id"))
        searcher.close()
        return result

    @classmethod
    def clear(cls):
        cls._instances = {}