[{"data":1,"prerenderedAt":1448},["ShallowReactive",2],{"blog-list-zh-tw":3},[4],{"id":5,"title":6,"body":7,"config":1432,"date":1433,"description":1434,"draft":1435,"extension":1436,"image":1432,"meta":1437,"navigation":1438,"path":1439,"seo":1440,"stem":1441,"tags":1442,"toolbar":1432,"translationKey":1446,"updated":1433,"__hash__":1447},"blog/zh-tw/blog/zero-hallucination-qa.md","我是如何實現閱讀器「零幻覺」問答的",{"type":8,"value":9,"toc":1396},"minimark",[10,18,33,36,41,48,53,58,76,81,94,99,133,136,140,158,165,169,184,189,226,233,237,250,275,280,397,415,422,424,428,435,450,457,477,483,485,489,492,498,500,504,527,537,598,601,612,622,629,631,635,642,648,655,659,666,674,681,685,695,738,749,755,757,761,775,783,790,793,835,845,853,859,866,870,879,885,896,898,902,908,912,919,923,946,956,958,962,968,1026,1032,1034,1038,1045,1068,1072,1092,1103,1105,1109,1120,1123,1146,1157,1163,1165,1169,1188,1199,1201,1205,1227,1238,1240,1244,1263,1269,1271,1275,1362,1369,1380],[11,12,13],"p",{},[14,15],"img",{"alt":16,"src":17},"封面：零幻覺問答","https://cdn.linghuxiong.com/resources/snapshots/ai-chat-cover.png",[19,20,21],"blockquote",{},[11,22,23,24,28,29,32],{},"本文分享 AI 閱讀器 ",[25,26,27],"strong",{},"零幻覺問答"," 的工程實作：回答嚴格基於當前書籍原文，關鍵論述可 ",[25,30,31],{},"一鍵溯源"," 到具體段落。若你也在做 AI 閱讀、文件 QA 或 RAG 類應用，希望三次迭代的經驗與最終架構能有所參考。",[34,35],"hr",{},[37,38,40],"h2",{"id":39},"一實踐歷程三個階段的演進","一、實踐歷程：三個階段的演進",[11,42,43,44,47],{},"零幻覺問答並非一開始就設計完備，而是在 ",[25,45,46],{},"成本、延遲和準確率"," 的拉扯中逐步演進。以下依時間順序回顧三個階段，便於理解當前架構為何長成這樣。",[49,50],"mermaid",{":config":51,"code":52},"config","flowchart%20LR%0A%20%20%20%20P1%5B%E9%9A%8E%E6%AE%B5%E4%B8%80%EF%BC%9A%E5%85%A8%E6%96%87%E7%9B%B4%E5%A1%9E%5D%20--%3E%20P2%5B%E9%9A%8E%E6%AE%B5%E4%BA%8C%EF%BC%9ALLM%20%E6%8F%90%E5%8F%96%E9%97%9C%E9%8D%B5%E5%8F%A5%5D%0A%20%20%20%20P2%20--%3E%20P3%5B%E9%9A%8E%E6%AE%B5%E4%B8%89%EF%BC%9A%E7%89%87%E6%AE%B5%E7%B4%A2%E5%BC%95%20%2B%20Tool%20%E6%AA%A2%E7%B4%A2%5D%0A%20%20%20%20P1%20-.-%3E%7C%E6%85%A2%E3%80%81%E8%B2%B4%E3%80%81%E9%95%B7%E6%9B%B8%E4%B8%8D%E6%BA%96%7C%20X1%5B%E6%B7%98%E6%B1%B0%5D%0A%20%20%20%20P2%20-.-%3E%7C%E4%B8%9F%E7%B4%B0%E7%AF%80%E3%80%81%E4%BB%8D%E5%81%8F%E6%85%A2%7C%20X2%5B%E6%B7%98%E6%B1%B0%5D%0A%20%20%20%20P3%20--%3E%7C%E7%95%B6%E5%89%8D%E6%96%B9%E6%A1%88%7C%20OK%5B%E9%9B%B6%E5%B9%BB%E8%A6%BA%20%2B%20%E5%8F%AF%E6%BA%AF%E6%BA%90%5D",[54,55,57],"h3",{"id":56},"階段一全文直塞-context最簡單也最先暴露問題","階段一：全文直塞 Context（最簡單，也最先暴露問題）",[11,59,60,63,64,67,68,71,72,75],{},[25,61,62],{},"做法："," 使用者開啟一本書提問時，將提取出的 ",[25,65,66],{},"全部正文"," 放進 System Prompt 或 User 訊息，交給對話模型作答。若全書超過約 ",[25,69,70],{},"40 萬字元","，則 ",[25,73,74],{},"硬截斷","——只保留前面一段，後續章節對模型不可見。",[11,77,78],{},[25,79,80],{},"優點：",[82,83,84,88,91],"ul",{},[85,86,87],"li",{},"實作成本極低，幾乎不需要預處理；",[85,89,90],{},"短書、結構簡單的文件效果尚可——模型確實「看到了整本書」；",[85,92,93],{},"互動簡單：問就能答，沒有「請先等待分析」的等待狀態。",[11,95,96],{},[25,97,98],{},"缺點（很快變得不可接受）：",[82,100,101,107,113,123],{},[85,102,103,106],{},[25,104,105],{},"回應慢","：每次提問都要把海量文字送進模型，首 Token 延遲和總耗時隨書長線性惡化；",[85,108,109,112],{},[25,110,111],{},"Token 成本高","：同一本書每問一次就重複付一遍全文的輸入費用；",[85,114,115,118,119,122],{},[25,116,117],{},"長書嚴重失真","：超過 40 萬字元後被截斷，後半本、附錄、結論章節等於不存在，且 UI 往往 ",[25,120,121],{},"沒有明確告知"," 已截斷；",[85,124,125,128,129,132],{},[25,126,127],{},"檢索粒度為零","：模型要在幾十萬字裡「大海撈針」，容易漏細節，也更容易產生 ",[25,130,131],{},"看似合理、實則無據"," 的概括——閱讀場景最忌諱這類幻覺。",[11,134,135],{},"階段一適合驗證 MVP，不適合作為產品級方案。",[54,137,139],{"id":138},"階段二用輕量-llm-提取關鍵句壓縮-context但壓得太狠","階段二：用輕量 LLM 提取關鍵句（壓縮 Context，但壓得太狠）",[11,141,142,144,145,148,149,152,153,157],{},[25,143,62],{}," 在提問前（或首次開啟書時），用 ",[25,146,147],{},"成本更低的模型"," 對正文做一輪預處理：依 Spine 分章（或整書分段），抽取 ",[25,150,151],{},"關鍵句","，輸出時保留 ",[154,155,156],"code",{},"[f檔案-起始-結束]"," 形式的位置標記，再將摘錄拼成較短文字，作為後續問答的 Context。",[11,159,160,161,164],{},"典型鏈路是 ",[25,162,163],{},"Extract → Cache → Chat","：先離線或按需跑一遍提取並落庫，之後每次提問複用同一份「關鍵句合集」。這與許多文件 QA 原型裡「先壓縮文件、再拿壓縮結果做 QA」的思路相同，也是我們在階段二實際採用過的路線。",[11,166,167],{},[25,168,80],{},[82,170,171,178,181],{},[85,172,173,174,177],{},"每次提問送入模型的文字 ",[25,175,176],{},"明顯縮短","，單次 Token 消耗較階段一顯著下降；",[85,179,180],{},"預處理結果可快取，同一本書不必每次提問都重新提取；",[85,182,183],{},"已引入位置標記，為後續溯源打下基礎。",[11,185,186],{},[25,187,188],{},"缺點（長書場景下依然扛不住）：",[82,190,191,197,207,216],{},[85,192,193,196],{},[25,194,195],{},"細節大量丟失","：「關鍵句」由模型主觀篩選，論證鏈上的限定條件、反例等容易被丟掉，答案容易「正確但片面」；",[85,198,199,202,203,206],{},[25,200,201],{},"長書 Context 仍然偏大","：大部頭作品即便只留關鍵句，拼接後的輸入依然可觀，",[25,204,205],{},"延遲和成本只是緩解，沒有根治","；",[85,208,209,212,213,206],{},[25,210,211],{},"雙重 LLM 誤差","：提取階段可能漏選，問答階段又可能誤讀摘錄，錯誤會 ",[25,214,215],{},"疊加",[85,217,218,221,222,225],{},[25,219,220],{},"靜態 Context","：無論使用者問的是某一章細節還是全書結構，送進模型的都是 ",[25,223,224],{},"同一份預提取文字","，無法依問題動態收窄範圍。",[11,227,228,229,232],{},"這一階段的教訓很明確：",[25,230,231],{},"問題不在「有沒有壓縮」，而在「壓縮是否按需、以及能否回到原文」","。",[54,234,236],{"id":235},"階段三片段索引-tool-按需檢索-原文回傳當前方案","階段三：片段索引 + Tool 按需檢索 + 原文回傳（當前方案）",[11,238,239,241,242,249],{},[25,240,62],{}," 基本思路參考了 ",[243,244,248],"a",{"href":245,"rel":246},"https://github.com/VectifyAI/PageIndex",[247],"nofollow","PageIndex","，相對階段二，核心變化有三點：",[251,252,253,259,269],"ol",{},[85,254,255,258],{},[25,256,257],{},"預處理產物是結構化索引","（目錄級摘要 + 精確字元 span），而不是把摘錄直接當作問答 Context；",[85,260,261,264,265,268],{},[25,262,263],{},"每次提問由模型透過 Tool Calling 按需檢索","，再 ",[25,266,267],{},"拉取帶位置標記的原文"," 作答；",[85,270,271,274],{},[25,272,273],{},"System Prompt 與前端聯動","，約束引用格式，並支援點擊角標跳轉、高亮原文。",[11,276,277],{},[25,278,279],{},"三階段對比：",[281,282,283,302],"table",{},[284,285,286],"thead",{},[287,288,289,293,296,299],"tr",{},[290,291,292],"th",{},"維度",[290,294,295],{},"階段一（全文直塞）",[290,297,298],{},"階段二（關鍵句提取）",[290,300,301],{},"階段三（當前）",[303,304,305,324,338,352,366,383],"tbody",{},[287,306,307,311,314,317],{},[308,309,310],"td",{},"單次提問 Context",[308,312,313],{},"全書（或截斷後的前半本）",[308,315,316],{},"預提取關鍵句合集",[308,318,319,320,323],{},"僅與問題相關的少量 ",[25,321,322],{},"原文"," 片段",[287,325,326,329,332,335],{},[308,327,328],{},"長書準確性",[308,330,331],{},"超 40 萬字元後嚴重下降",[308,333,334],{},"依賴提取品質，易丟細節",[308,336,337],{},"依目錄/span 檢索，不受全書長度硬截斷",[287,339,340,343,346,349],{},[308,341,342],{},"回應速度",[308,344,345],{},"慢",[308,347,348],{},"略好，長書仍慢",[308,350,351],{},"檢索 + 短 Context，明顯更快",[287,353,354,357,360,363],{},[308,355,356],{},"Token 成本",[308,358,359],{},"極高",[308,361,362],{},"中等偏高",[308,364,365],{},"預處理攤銷 + 按需付費",[287,367,368,371,374,377],{},[308,369,370],{},"溯源能力",[308,372,373],{},"弱（難標註出處）",[308,375,376],{},"有位置標記，但內容已是二次篩選",[308,378,379,380],{},"角標對應 ",[25,381,382],{},"真實原文 span",[287,384,385,388,391,394],{},[308,386,387],{},"工程複雜度",[308,389,390],{},"低",[308,392,393],{},"中",[308,395,396],{},"高",[11,398,399,402,403,406,407,410,411,414],{},[25,400,401],{},"為何停在階段三："," 閱讀場景的零幻覺，關鍵不是「讓模型看過盡量多的字」，而是 ",[25,404,405],{},"「作答前必須拿到與問題相關的原文證據」","。階段一、二都在 Context ",[25,408,409],{},"體積"," 上做文章；階段三把鏈路拆成 ",[25,412,413],{},"「索引（預處理）→ 檢索（Tool）→ 取證（原文）→ 作答（約束生成）」","，才同時兼顧準確率、成本與可溯源性。",[11,416,417,418,421],{},"下文展開 ",[25,419,420],{},"階段三"," 的實作細節。",[34,423],{},[37,425,427],{"id":426},"二問題定義閱讀場景下幻覺比普通-chat-更致命","二、問題定義：閱讀場景下，幻覺比普通 Chat 更致命",[11,429,430,431,434],{},"普通 ChatBot 偶發錯誤，使用者往往可以容忍。但在 ",[25,432,433],{},"書籍 QA"," 裡，幻覺的代價更高：",[82,436,437,444,447],{},[85,438,439,440,443],{},"使用者問的是 ",[25,441,442],{},"這本書"," 說了什麼，不是問模型的 parametric memory；",[85,445,446],{},"一句似是而非的「書中觀點」，可能誤導筆記、引用甚至二次傳播；",[85,448,449],{},"沒有出處，使用者無法核實，產品信任很難建立。",[11,451,452,453,456],{},"因此，「零幻覺」在工程上落地為三條 ",[25,454,455],{},"可執行"," 的規則：",[251,458,459,465,471],{},[85,460,461,464],{},[25,462,463],{},"書內問題必須先查書","：凡可能與當前書籍相關的問題，模型必須先走檢索（Tool），再組織答案；",[85,466,467,470],{},[25,468,469],{},"答案必須可溯源","：關鍵結論附帶原文位置標記，前端可解析並跳轉高亮；",[85,472,473,476],{},[25,474,475],{},"查不到就說查不到","：書中沒有的內容應明確告知，而不是用通用知識冒充「書中觀點」。",[11,478,479,480,482],{},"下文依 ",[25,481,420],{}," 的資料流，說明上述規則如何落地。",[34,484],{},[37,486,488],{"id":487},"三整體架構預處理-工具檢索-約束生成-可點擊溯源","三、整體架構：預處理 → 工具檢索 → 約束生成 → 可點擊溯源",[49,490],{":config":51,"code":491},"flowchart%20TB%0A%20%20%20%20subgraph%20prep%20%5B%E9%9B%A2%E7%B7%9A%2F%E9%A6%96%E6%AC%A1%E9%A0%90%E8%99%95%E7%90%86%5D%0A%20%20%20%20%20%20%20%20A%5B%E4%BE%9D%E7%9B%AE%E9%8C%84%E6%88%96%E9%95%B7%E5%BA%A6%E5%88%87%E5%88%86%E5%85%A8%E6%9B%B8%5D%20--%3E%20B%5BLLM%20%E7%94%A2%E7%94%9F%E7%89%87%E6%AE%B5%E6%91%98%E8%A6%81%5D%0A%20%20%20%20%20%20%20%20B%20--%3E%20C%5B%E6%9C%AC%E6%A9%9F%E6%8C%81%E4%B9%85%E5%8C%96%20Segment%20%E5%BF%AB%E5%8F%96%5D%0A%20%20%20%20end%0A%0A%20%20%20%20subgraph%20ask%20%5B%E4%BD%BF%E7%94%A8%E8%80%85%E6%8F%90%E5%95%8F%5D%0A%20%20%20%20%20%20%20%20D%5B%E4%BD%BF%E7%94%A8%E8%80%85%E8%BC%B8%E5%85%A5%E5%95%8F%E9%A1%8C%5D%20--%3E%20E%7B%E5%B7%B2%E6%9C%89%20Segment%20%E5%BF%AB%E5%8F%96%3F%7D%0A%20%20%20%20%20%20%20%20E%20--%3E%7C%E5%90%A6%7C%20F%5B%E6%8F%90%E5%8F%96%E5%85%A8%E6%96%87%20%2F%20%E8%A9%A2%E5%95%8F%E6%98%AF%E5%90%A6%E9%A0%90%E8%99%95%E7%90%86%5D%0A%20%20%20%20%20%20%20%20F%20--%3E%20prep%0A%20%20%20%20%20%20%20%20E%20--%3E%7C%E6%98%AF%7C%20G%5B%E8%A8%BB%E5%86%8A%20Tool%20Calling%5D%0A%20%20%20%20end%0A%0A%20%20%20%20subgraph%20retrieve%20%5B%E5%B7%A5%E5%85%B7%E6%AA%A2%E7%B4%A2%5D%0A%20%20%20%20%20%20%20%20G%20--%3E%20H%7B%E5%95%8F%E9%A1%8C%E9%A1%9E%E5%9E%8B%7D%0A%20%20%20%20%20%20%20%20H%20--%3E%7C%E5%85%A8%E6%9B%B8%E6%A6%82%E8%A6%BD%2F%E6%9B%B8%E8%A9%95%7C%20I%5Bget_full_book_segment_summaries%5D%0A%20%20%20%20%20%20%20%20H%20--%3E%7C%E5%85%B7%E9%AB%94%E4%BA%8B%E5%AF%A6%2F%E4%BA%BA%E7%89%A9%2F%E7%AB%A0%E7%AF%80%7C%20J%5Bget_related_segment_summaries%5D%0A%20%20%20%20%20%20%20%20J%20--%3E%20K%5BLLM%20%E5%BE%9E%E6%91%98%E8%A6%81%E7%9B%AE%E9%8C%84%E4%B8%AD%E9%81%B8%E7%9B%B8%E9%97%9C%E7%89%87%E6%AE%B5%20ID%5D%0A%20%20%20%20%20%20%20%20K%20--%3E%20L%5B%E4%BE%9D%20span%20%E6%8B%89%E5%8F%96%E5%8E%9F%E6%96%87%20%2B%20%E4%BD%8D%E7%BD%AE%E6%A8%99%E8%A8%98%5D%0A%20%20%20%20%20%20%20%20I%20--%3E%20M%5B%E6%8B%BC%E6%8E%A5%E5%85%A8%E6%9B%B8%E7%89%87%E6%AE%B5%E6%91%98%E8%A6%81%5D%0A%20%20%20%20end%0A%0A%20%20%20%20subgraph%20answer%20%5B%E7%94%9F%E6%88%90%E8%88%87%E5%B1%95%E7%A4%BA%5D%0A%20%20%20%20%20%20%20%20L%20--%3E%20N%5BTool%20%E7%B5%90%E6%9E%9C%E5%9B%9E%E5%82%B3%E6%A8%A1%E5%9E%8B%5D%0A%20%20%20%20%20%20%20%20M%20--%3E%20N%0A%20%20%20%20%20%20%20%20N%20--%3E%20O%5BSystem%20Prompt%20%E7%B4%84%E6%9D%9F%E5%BC%95%E7%94%A8%E6%A0%BC%E5%BC%8F%5D%0A%20%20%20%20%20%20%20%20O%20--%3E%20P%5B%E4%B8%B2%E6%B5%81%E8%BC%B8%E5%87%BA%E7%AD%94%E6%A1%88%20%2B%20%E4%BD%8D%E7%BD%AE%E8%A7%92%E6%A8%99%5D%0A%20%20%20%20%20%20%20%20P%20--%3E%20Q%5B%E6%B8%B2%E6%9F%93%E5%8F%AF%E9%BB%9E%E6%93%8A%E5%BC%95%E7%94%A8%E8%A7%92%E6%A8%99%5D%0A%20%20%20%20%20%20%20%20Q%20--%3E%20R%5B%E9%BB%9E%E6%93%8A%20%E2%86%92%20%E9%A0%90%E8%A6%BD%E5%8E%9F%E6%96%87%20%E2%86%92%20%E8%B7%B3%E8%BD%89%E9%AB%98%E4%BA%AE%5D%0A%20%20%20%20end",[11,493,494,495,232],{},"核心思路可概括為：",[25,496,497],{},"不讓模型「憑記憶答題」，而是讓它「先取證、再作答、並標註出處」",[34,499],{},[37,501,503],{"id":502},"四預處理把整本書變成可檢索的片段索引","四、預處理：把整本書變成可檢索的「片段索引」",[11,505,506,507,510,511,514,515,518,519,522,523,526],{},"若每次提問仍採用 ",[25,508,509],{},"階段一"," 的全文 Context，長書必然爆 Token，檢索粒度也過粗。階段三的解法是：使用者首次對某本書發起 AI 對話時，背景非同步跑 ",[25,512,513],{},"片段摘要任務","，依 ",[25,516,517],{},"目錄結構"," 或 ",[25,520,521],{},"文字長度"," 將全書切成若干 ",[154,524,525],{},"Segment","，為每個片段產生摘要，並持久化到本機 IndexedDB。",[11,528,529,530,532,533,536],{},"每個 ",[154,531,525],{}," 在資料結構上包含摘要與 ",[25,534,535],{},"正文物理位置","：",[281,538,539,549],{},[284,540,541],{},[287,542,543,546],{},[290,544,545],{},"欄位",[290,547,548],{},"含義",[303,550,551,565,578,588],{},[287,552,553,562],{},[308,554,555,558,559],{},[154,556,557],{},"startFileIndex"," / ",[154,560,561],{},"endFileIndex",[308,563,564],{},"Spine 檔案索引（PDF 則每頁一個檔案）",[287,566,567,575],{},[308,568,569,558,572],{},[154,570,571],{},"startOffset",[154,573,574],{},"endOffset",[308,576,577],{},"字元級起迄偏移",[287,579,580,585],{},[308,581,582],{},[154,583,584],{},"sequence",[308,586,587],{},"線性閱讀順序",[287,589,590,595],{},[308,591,592],{},[154,593,594],{},"title",[308,596,597],{},"對應目錄標題",[11,599,600],{},"切分策略兼顧精度與成本：單一目錄正文不超過約 20KB 時只摘要該節點；同級目錄會合併成批（15KB～20KB）再呼叫 LLM；無目錄的大塊正文則依 3～4 萬字元區間切段。",[11,602,603,604,607,608,611],{},"摘要生成時的 System Prompt 會要求 ",[25,605,606],{},"保留原文位置標記","（格式 ",[154,609,610],{},"[f數字-數字-數字]","），以便後續 Tool 回傳原文時，位置資訊與 spine 字元偏移一致。核心約束如下：",[613,614,620],"pre",{"className":615,"code":617,"language":618,"meta":619},[616],"language-text","若摘要內容與原文某段相關，須保留段末位置資訊，格式 [f數字-數字-數字]（如 [f1-90-109]）。\n位置標記是整體，禁止修改、合併或省略其中的任何字元或數值。\n","text","",[154,621,617],{"__ignoreMap":619},[11,623,624,625,628],{},"預處理完成後，問答不再依賴「整書 Context」，而是依賴 ",[25,626,627],{},"結構化片段索引","——這是長書場景下零幻覺的工程前提。",[34,630],{},[37,632,634],{"id":633},"五位置標記體系把出處編碼進文字","五、位置標記體系：把「出處」編碼進文字",[11,636,637,638,641],{},"零幻覺不僅要求內容來自原文，還要求 ",[25,639,640],{},"出處可機器解析、可在 UI 中跳轉","。我們採用內嵌位置標記：",[613,643,646],{"className":644,"code":645,"language":618},[616],"[f{fileIndex}-{startChar}-{endChar}]\n",[154,647,645],{"__ignoreMap":619},[11,649,650,651,654],{},"例如 ",[154,652,653],{},"[f5-123-165]"," 表示：第 5 個 Spine 檔案（從 0 起算）中，字元偏移 123～165 的文字區間。",[54,656,658],{"id":657},"_51-標記如何寫入正文","5.1 標記如何寫入正文",[11,660,661,662,665],{},"正文提取層在輸出片段時，為每個小段在段末寫入 ",[154,663,664],{},"[f{fileIndex}-{start}-{end}]","。示意：",[613,667,672],{"className":668,"code":670,"language":671,"meta":619},[669],"language-typescript","const position = `[f${fileIndex}-${absOffset}-${absOffset + segment.length}]`;\nfileLines.push(segment.text.trim() + position);\n","typescript",[154,673,670],{"__ignoreMap":619},[11,675,676,677,680],{},"無論是預處理摘要還是 Tool 回傳的原文摘錄，位置資訊都與 ",[25,678,679],{},"Spine 字元偏移"," 對齊，而不是讓模型「估算頁碼」。",[54,682,684],{"id":683},"_52-對模型輸出的約束","5.2 對模型輸出的約束",[11,686,687,688,694],{},"組裝 System Prompt 時，我們單獨約定了 ",[25,689,690],{},[691,692,693],"span",{},"Position Citation Rules","，核心五條：",[251,696,697,707,717,723,732],{},[85,698,699,702,703,706],{},[25,700,701],{},"標準格式","：必須使用 ",[154,704,705],{},"[f_fileIndex-startChar-endChar]","，三段數字缺一不可；",[85,708,709,712,713,716],{},[25,710,711],{},"只引用當前來源","：角標須 ",[25,714,715],{},"原樣複製"," 自本輪 System/User 訊息或 Tool 回傳文字中的標記；",[85,718,719,722],{},[25,720,721],{},"禁止偽造","：不得自行計算、修改或編造位置；",[85,724,725,728,729,206],{},[25,726,727],{},"寧缺毋濫","：當前上下文沒有合法標記時，正常作答即可，",[25,730,731],{},"不要輸出任何位置標記",[85,733,734,737],{},[25,735,736],{},"緊跟論述","：標記須緊跟相關句段，禁止在文末堆砌引用清單。",[11,739,740,741,744,745,748],{},"前端展示前還會過濾模型偶發輸出的 ",[25,742,743],{},"兩段位"," 非法標記（如 ",[154,746,747],{},"[f1-293]","），避免無效角標進入 UI。",[11,750,751],{},[14,752],{"alt":753,"src":754},"引用溯源彈窗","https://cdn.linghuxiong.com/resources/snapshots/ai-chat.png",[34,756],{},[37,758,760],{"id":759},"六tool-calling先檢索再回答","六、Tool Calling：先檢索，再回答",[11,762,763,764,767,768,771,772,232],{},"當對話綁定某本書（存在 ",[154,765,766],{},"resourceId","，且 ",[154,769,770],{},"chatType === 'chat'","）時，每次生成前會向模型註冊兩個 Tool，並掛載對應的 executor。整體遵循 OpenAI 相容的 ",[25,773,774],{},"function calling 迴圈",[54,776,778,779,782],{"id":777},"_61-get_related_segment_summaries-針對具體問題查片段","6.1 ",[154,780,781],{},"get_related_segment_summaries"," —— 針對具體問題查片段",[11,784,785,786,789],{},"適用於：概念、人物、情節、章節細節等 ",[25,787,788],{},"有明確檢索意圖"," 的問題。",[11,791,792],{},"流程簡述：",[251,794,795,802,808,815,829],{},[85,796,797,798,801],{},"模型將使用者口語 ",[25,799,800],{},"改寫為書中可能出現的術語","（System Prompt 中的「Optimize Search Queries」）；",[85,803,804,805,206],{},"呼叫 Tool，傳入 ",[154,806,807],{},"question",[85,809,810,811,814],{},"將所有片段摘要依 Token 預算 ",[25,812,813],{},"分批","（單批約 3 萬 Token，最多 5 批）；",[85,816,817,818,821,822,825,826,206],{},"每批發起一次 ",[25,819,820],{},"獨立的 LLM 請求","，從 ",[154,823,824],{},"{ id, title, summary }"," 清單中選出相關片段 ID（最多 5 個），回傳 JSON，形如 ",[154,827,828],{},"{\"Thinking\":\"...\",\"answer\":[\"1\",\"3\"]}",[85,830,831,832,834],{},"依選中 Segment 的 span，從 Spine ",[25,833,267],{},"（不是摘要），作為 Tool 結果回傳。",[11,836,837,840,841,844],{},[25,838,839],{},"關鍵設計：Tool 回傳原文，而非摘要。"," 模型作答時看到的是真實段落 + 內嵌 ",[154,842,843],{},"[f…]","，避免「摘要 → 再概括」帶來的漂移。",[54,846,848,849,852],{"id":847},"_62-get_full_book_segment_summaries-全書概覽類問題","6.2 ",[154,850,851],{},"get_full_book_segment_summaries"," —— 全書概覽類問題",[11,854,855,856,789],{},"適用於：「總結全書」「點評這本書」「整體結構/主題」等 ",[25,857,858],{},"需要全局視野",[11,860,861,862,865],{},"依閱讀順序拼接所有片段的 ",[154,863,864],{},"summary"," 回傳，避免逐段相關度篩選遺漏關鍵章節。",[54,867,869],{"id":868},"_63-system-prompt書優先工具優先","6.3 System Prompt：書優先、工具優先",[11,871,872,873,878],{},"綁定書籍時，System Prompt 注入 ",[25,874,875],{},[691,876,877],{},"Core Principles for Reading Assistant","，核心三條：",[613,880,883],{"className":881,"code":882,"language":618},[616],"1. Book First, Tool First\n   - 任何可能與書籍相關的問題，必須先呼叫工具檢索；\n   - 答案必須主要依據檢索結果，禁止不檢索就編造「書中內容」。\n\n2. General Knowledge as Fallback Only\n   - 僅當：純閒聊 / 使用者明確要求不用書 / 工具無結果時，才可使用通用知識；\n   - 若書中沒有，必須先聲明「書中未提及此內容」，再補充通用知識。\n\n3. Direct Style\n   - 直入主題，禁止「根據提供的材料…」「綜上所述…」等套話。\n",[154,884,882],{"__ignoreMap":619},[11,886,887,888,891,892,895],{},"生成層實作標準 Tool 迴圈：",[154,889,890],{},"tool_calls"," → 執行 executor → 追加 ",[154,893,894],{},"role: tool"," → 繼續請求，直到輸出最終文字。啟用 tools 時關閉 thinking 通道，避免與 function call 協定衝突。",[34,897],{},[37,899,901],{"id":900},"七前端溯源從角標到原文高亮","七、前端溯源：從角標到原文高亮",[11,903,904,905,907],{},"模型輸出的 ",[154,906,653],{}," 不會直接展示，在渲染層轉為可點擊引用。",[54,909,911],{"id":910},"_71-角標渲染","7.1 角標渲染",[11,913,914,915,918],{},"展示前將位置標記規範化為 Markdown 連結，例如 ",[154,916,917],{},"[1]([f5-123-165])","，再渲染為序號角標；同一位置多次出現時可去重，避免 UI 堆疊。",[54,920,922],{"id":921},"_72-點擊互動","7.2 點擊互動",[251,924,925,934,940],{},[85,926,927,930,931,933],{},[25,928,929],{},"首次點擊","：解析 ",[154,932,843],{}," → 取 fileIndex 與字元偏移 → 從 Spine 原文提取文字 → 彈出預覽（可帶目錄標題）；",[85,935,936,939],{},[25,937,938],{},"再次點擊同一角標","：關閉彈窗；",[85,941,942,945],{},[25,943,944],{},"確認跳轉","：開啟閱讀視圖，依字元區間高亮。",[11,947,948,949,952,953,232],{},"從模型複製的標記到使用者看到的原文，中間 ",[25,950,951],{},"不經 LLM 二次加工","，溯源鏈路全程 ",[25,954,955],{},"確定、可重現",[34,957],{},[37,959,961],{"id":960},"八邊界情況與誠實降級","八、邊界情況與誠實降級",[11,963,964,965,536],{},"零幻覺不等於「永遠有答案」，而是 ",[25,966,967],{},"沒有證據時不瞎編",[281,969,970,980],{},[284,971,972],{},[287,973,974,977],{},[290,975,976],{},"場景",[290,978,979],{},"行為",[303,981,982,990,1002,1010,1018],{},[287,983,984,987],{},[308,985,986],{},"片段摘要尚未產生",[308,988,989],{},"先提取全文做摘要",[287,991,992,995],{},[308,993,994],{},"Tool 檢索無結果",[308,996,997,998,1001],{},"回傳 ",[154,999,1000],{},"(No relevant segment excerpts found…)","，模型應聲明書中未提及",[287,1003,1004,1007],{},[308,1005,1006],{},"模型輸出了非法兩段位標記",[308,1008,1009],{},"前端過濾，不展示無效角標",[287,1011,1012,1015],{},[308,1013,1014],{},"使用者純閒聊",[308,1016,1017],{},"System Prompt 允許脫離書籍，用通用知識回答",[287,1019,1020,1023],{},[308,1021,1022],{},"匯出對話",[308,1024,1025],{},"可將角標轉為閱讀器深連結，便於分享或歸檔",[11,1027,1028],{},[14,1029],{"alt":1030,"src":1031},"對話匯出","https://cdn.linghuxiong.com/resources/snapshots/ai-chat-export.png",[34,1033],{},[37,1035,1037],{"id":1036},"九設計取捨為什麼不用向量-rag","九、設計取捨：為什麼不用「向量 RAG」？",[11,1039,1040,1041,1044],{},"做文件 QA 的同行常會問：既然要做檢索增強，為什麼不走 ",[25,1042,1043],{},"Embedding + 向量庫 Top-K"," 這條標準路線？",[11,1046,1047,1048,1051,1052,1055,1056,1059,1060,1063,1064,1067],{},"實際上 ",[25,1049,1050],{},"我們也在做 RAG","——每次回答前都會先查書、再生成。差別在於：社群語境裡的 RAG 往往預設包含 ",[25,1053,1054],{},"向量化與相似度檢索","；當前方案是 ",[25,1057,1058],{},"「片段索引 + Tool 按需拉原文」","（階段三），",[25,1061,1062],{},"刻意不引入向量層","。以下從 ",[25,1065,1066],{},"架構約束"," 說明取捨，並非否定向量 RAG 的價值。",[54,1069,1071],{"id":1070},"界定範圍不是不用檢索而是不用向量檢索","界定範圍：不是不用檢索，而是不用「向量檢索」",[82,1073,1074,1083],{},[85,1075,1076,1079,1080,232],{},[25,1077,1078],{},"廣義 RAG","：檢索相關材料 → 再生成 → ",[25,1081,1082],{},"我們在做",[85,1084,1085,1088,1089,232],{},[25,1086,1087],{},"向量 RAG","：召回依賴 Embedding 相似度 → ",[25,1090,1091],{},"當前版本不做",[11,1093,1094,1095,1098,1099,1102],{},"全書預處理為 ",[25,1096,1097],{},"片段摘要索引","；提問時模型透過 Tool 選段，再 ",[25,1100,1101],{},"回傳原文","。檢索增強存在，但不依賴單獨的 embedding 模型與向量索引維護。",[34,1104],{},[54,1106,1108],{"id":1107},"原因一支援自訂-llm-provider設定鏈路要盡量短","原因一：支援自訂 LLM Provider，設定鏈路要盡量短",[11,1110,1111,1112,1115,1116,1119],{},"產品允許使用者自由接入 ",[25,1113,1114],{},"自有 API Key","、自訂 Base URL，或使用 ",[25,1117,1118],{},"本機 Ollama","——對話模型由使用者自選，成本和資料路徑可控。這對許多自託管、多模型對比的場景是硬需求。",[11,1121,1122],{},"疊加典型向量 RAG 後，整合面會明顯變寬：",[82,1124,1125,1136,1139],{},[85,1126,1127,1128,1131,1132,1135],{},"除 ",[25,1129,1130],{},"Chat 模型"," 外，通常還需 ",[25,1133,1134],{},"Embedding 模型","（另一個 model name，有時還是另一個 endpoint）；",[85,1137,1138],{},"Ollama 等本機部署還要單獨拉 embedding 模型，並處理維度、介面相容；",[85,1140,1141,1142,1145],{},"故障域變複雜：Chat 正常但 ",[25,1143,1144],{},"檢索為空"," 時，可能是 embedding、索引或維度不一致，排查成本高於「單 Provider 全鏈路」。",[11,1147,1148,1149,1152,1153,1156],{},"當前方案裡，",[25,1150,1151],{},"選段與作答共用同一套 Provider 設定","，避免「Chat 用 A、建索引用 B」。若你在做 ",[25,1154,1155],{},"可插拔 LLM"," 的應用，這往往比多幾個點的召回率更重要。",[11,1158,1159],{},[14,1160],{"alt":1161,"src":1162},"自訂 AI 服務商","https://cdn.linghuxiong.com/resources/snapshots/ai-customize-providers.png",[34,1164],{},[54,1166,1168],{"id":1167},"原因二embedding-與索引強綁定切換-provider-成本高","原因二：Embedding 與索引強綁定，切換 Provider 成本高",[11,1170,1171,1172,1175,1176,1179,1180,1183,1184,1187],{},"向量 RAG 裡常被低估的一點：",[25,1173,1174],{},"向量不是通用中間格式，而是某個 embedding 模型下的座標。"," 建庫用模型 A、查詢用模型 B 時，相似度通常 ",[25,1177,1178],{},"不可比","——換模型往往意味著 ",[25,1181,1182],{},"全書重新向量化","，且不同模型的 ",[25,1185,1186],{},"向量維度","（768 / 1024 / 1536 …）會綁死儲存 schema。",[11,1189,1190,1191,1194,1195,1198],{},"階段三持久化的是 ",[25,1192,1193],{},"結構化摘要 + 字元 span","，不存向量；切換 Chat 模型時 ",[25,1196,1197],{},"無需重建索引","，證據鏈（原文位置）不變。這與「使用者隨時對比不同 LLM」的目標更一致。",[34,1200],{},[54,1202,1204],{"id":1203},"原因三有目錄的長文件結構化路由往往已夠用","原因三：有目錄的長文件，結構化路由往往已夠用",[11,1206,1207,1208,1211,1212,1215,1216,1219,1220,1226],{},"電子書、PDF 通常有 ",[25,1209,1210],{},"章節結構","；預處理已產出 ",[25,1213,1214],{},"段標題 + 摘要","。對「某一章講了什麼」「書中如何定義某概念」類問題，在摘要目錄上選段再 ",[25,1217,1218],{},"拉回原文","，實務中效果穩定；且 Tool 回傳的是 ",[25,1221,1222,1223,1225],{},"帶 ",[154,1224,843],{}," 的原文","，零幻覺仍錨定在字元 span 上。",[11,1228,1229,1230,1233,1234,1237],{},"向量檢索在語意模糊、跨語言、長段落字面匹配等場景仍有優勢；在 ",[25,1231,1232],{},"有 TOC、可預處理、要強溯源"," 的閱讀器裡，優先把複雜度放在 ",[25,1235,1236],{},"Tool + 原文回傳 + 引用約束"," 上，ROI 通常更高。",[34,1239],{},[54,1241,1243],{"id":1242},"後續方向混合召回而非推倒重來","後續方向：混合召回，而非推倒重來",[11,1245,1246,1247,1250,1251,1254,1255,1258,1259,1262],{},"不排除將來增加 ",[25,1248,1249],{},"向量粗召回","（例如 embedding 只篩 Top-N 候選章節），最終仍走 ",[25,1252,1253],{},"選段 → 原文回傳 → 可點擊溯源","，零幻覺規則不變。若引入，會盡量滿足：Embedding ",[25,1256,1257],{},"可選","、換模型時 ",[25,1260,1261],{},"顯式提示重建索引","，避免 silent wrong retrieval。",[11,1264,1265,1266,232],{},"在此之前，優先保證：",[25,1267,1268],{},"任意 OpenAI 相容 Chat API 即可工作，換 Chat 模型不必重建本機索引",[34,1270],{},[37,1272,1274],{"id":1273},"十小結","十、小結",[281,1276,1277,1290],{},[284,1278,1279],{},[287,1280,1281,1284,1287],{},[290,1282,1283],{},"環節",[290,1285,1286],{},"手段",[290,1288,1289],{},"作用",[303,1291,1292,1303,1316,1329,1340,1351],{},[287,1293,1294,1297,1300],{},[308,1295,1296],{},"預處理",[308,1298,1299],{},"依目錄/長度切分 + 片段摘要快取",[308,1301,1302],{},"長書可檢索、可定位",[287,1304,1305,1308,1313],{},[308,1306,1307],{},"位置標記",[308,1309,1310,1312],{},[154,1311,156],{}," 寫入原文",[308,1314,1315],{},"出處可機器解析",[287,1317,1318,1321,1326],{},[308,1319,1320],{},"Tool 檢索",[308,1322,1323,1324],{},"依問題查片段/全書摘要，回傳 ",[25,1325,322],{},[308,1327,1328],{},"作答前強制取證",[287,1330,1331,1334,1337],{},[308,1332,1333],{},"System Prompt",[308,1335,1336],{},"書優先、禁止偽造角標、查不到要說",[308,1338,1339],{},"約束生成行為",[287,1341,1342,1345,1348],{},[308,1343,1344],{},"前端溯源",[308,1346,1347],{},"角標 → 預覽 → 跳轉高亮",[308,1349,1350],{},"使用者可核驗證據",[287,1352,1353,1356,1359],{},[308,1354,1355],{},"不用向量檢索",[308,1357,1358],{},"單 Provider、換 Chat 模型無需重建索引",[308,1360,1361],{},"降低整合與遷移成本",[11,1363,1364,1365,1368],{},"「零幻覺」不是指望模型從不犯錯，而是 ",[25,1366,1367],{},"用工程結構把輸出鎖在證據鏈上","：沒有檢索結果就不應冒充書中內容；有檢索結果則應給出可核驗的原文位置。",[11,1370,1371,1372,1375,1376,1379],{},"若你也在做 AI 閱讀或文件 QA，希望 ",[25,1373,1374],{},"全文直塞 → 關鍵句提取 → Tool-First 按需檢索"," 這條演進路徑，以及 ",[25,1377,1378],{},"內嵌位置標記 + 原文回傳"," 的做法，能作為可參考的一種實作。",[19,1381,1382],{},[11,1383,1384,1385,1390,1391,1395],{},"以上是我們在開發 ",[243,1386,1389],{"href":1387,"rel":1388},"https://reader.linghuxiong.com",[247],"令狐兄","（Foxycape）AI 閱讀器實踐心得，僅供參考。文末可前往 ",[243,1392,1394],{"href":1393},"/zh-tw#download","下載頁面"," 體驗閱讀器。",{"title":619,"searchDepth":1397,"depth":1397,"links":1398},2,[1399,1405,1406,1407,1408,1412,1419,1423,1424,1431],{"id":39,"depth":1397,"text":40,"children":1400},[1401,1403,1404],{"id":56,"depth":1402,"text":57},3,{"id":138,"depth":1402,"text":139},{"id":235,"depth":1402,"text":236},{"id":426,"depth":1397,"text":427},{"id":487,"depth":1397,"text":488},{"id":502,"depth":1397,"text":503},{"id":633,"depth":1397,"text":634,"children":1409},[1410,1411],{"id":657,"depth":1402,"text":658},{"id":683,"depth":1402,"text":684},{"id":759,"depth":1397,"text":760,"children":1413},[1414,1416,1418],{"id":777,"depth":1402,"text":1415},"6.1 get_related_segment_summaries —— 針對具體問題查片段",{"id":847,"depth":1402,"text":1417},"6.2 get_full_book_segment_summaries —— 全書概覽類問題",{"id":868,"depth":1402,"text":869},{"id":900,"depth":1397,"text":901,"children":1420},[1421,1422],{"id":910,"depth":1402,"text":911},{"id":921,"depth":1402,"text":922},{"id":960,"depth":1397,"text":961},{"id":1036,"depth":1397,"text":1037,"children":1425},[1426,1427,1428,1429,1430],{"id":1070,"depth":1402,"text":1071},{"id":1107,"depth":1402,"text":1108},{"id":1167,"depth":1402,"text":1168},{"id":1203,"depth":1402,"text":1204},{"id":1242,"depth":1402,"text":1243},{"id":1273,"depth":1397,"text":1274},null,"2026-06-03","分享 AI 閱讀器零幻覺問答的工程實作：回答嚴格基於當前書籍原文，關鍵論述可一鍵溯源到具體段落。",false,"md",{},true,"/zh-tw/blog/zero-hallucination-qa",{"title":6,"description":1434},"zh-tw/blog/zero-hallucination-qa",[1443,1444,1445],"閱讀器","AI","技術","zero-hallucination-qa","FAS1XA39T3l_OReLlkcSgPiyXgco_dsQVTaKMAAL-oU",1780489852792]