[{"data":1,"prerenderedAt":1459},["ShallowReactive",2],{"blog-list-zh-hk":3},[4],{"id":5,"title":6,"body":7,"config":1450,"date":1450,"description":16,"draft":1451,"extension":1452,"image":1450,"meta":1453,"navigation":1454,"path":1455,"seo":1456,"stem":1457,"tags":1450,"toolbar":1450,"translationKey":1450,"updated":1450,"__hash__":1458},"blog/zh-hk/blog/zero-hallucination-qa.md","Zero Hallucination Qa",{"type":8,"value":9,"toc":1414},"minimark",[10,13,17,30,32,39,54,56,61,68,73,78,96,101,112,117,151,154,158,176,183,187,202,207,244,251,255,268,293,298,415,433,440,442,446,453,468,475,495,501,503,507,510,516,518,522,545,555,616,619,630,640,647,649,653,660,666,673,677,684,692,699,703,713,756,767,773,775,779,793,801,808,811,853,863,871,877,884,888,897,903,914,916,920,926,930,937,941,964,974,976,980,986,1044,1050,1052,1056,1063,1086,1090,1110,1121,1123,1127,1138,1141,1164,1175,1181,1183,1187,1206,1217,1219,1223,1245,1256,1258,1262,1281,1287,1289,1293,1380,1387,1398],[11,12],"hr",{},[14,15,16],"p",{},"title: 我是如何實現閱讀器「零幻覺」問答的\ndescription: 分享 AI 閱讀器零幻覺問答的工程實作：回答嚴格基於當前書籍原文，關鍵論述可一鍵溯源到具體段落。\ndate: 2026-06-03\nupdated: 2026-06-03\ntranslationKey: zero-hallucination-qa\ntags:",[18,19,20,24,27],"ul",{},[21,22,23],"li",{},"閱讀器",[21,25,26],{},"AI",[21,28,29],{},"技術\ndraft: false",[11,31],{},[14,33,34],{},[35,36],"img",{"alt":37,"src":38},"封面：零幻覺問答","https://cdn.linghuxiong.com/resources/snapshots/ai-chat-cover.png",[40,41,42],"blockquote",{},[14,43,44,45,49,50,53],{},"本文分享 AI 閱讀器 ",[46,47,48],"strong",{},"零幻覺問答"," 的工程實作：回答嚴格基於當前書籍原文，關鍵論述可 ",[46,51,52],{},"一鍵溯源"," 到具體段落。若你也在做 AI 閱讀、文件 QA 或 RAG 類應用，希望三次迭代的經驗與最終架構能有所參考。",[11,55],{},[57,58,60],"h2",{"id":59},"一實踐歷程三個階段的演進","一、實踐歷程：三個階段的演進",[14,62,63,64,67],{},"零幻覺問答並非一開始就設計完備，而是在 ",[46,65,66],{},"成本、延遲和準確率"," 的拉扯中逐步演進。以下依時間順序回顧三個階段，便於理解當前架構為何長成這樣。",[69,70],"mermaid",{":config":71,"code":72},"config","flowchart%20LR%0A%20%20%20%20P1%5B%E9%9A%8E%E6%AE%B5%E4%B8%80%EF%BC%9A%E5%85%A8%E6%96%87%E7%9B%B4%E5%A1%9E%5D%20--%3E%20P2%5B%E9%9A%8E%E6%AE%B5%E4%BA%8C%EF%BC%9ALLM%20%E6%8F%90%E5%8F%96%E9%97%9C%E9%8D%B5%E5%8F%A5%5D%0A%20%20%20%20P2%20--%3E%20P3%5B%E9%9A%8E%E6%AE%B5%E4%B8%89%EF%BC%9A%E7%89%87%E6%AE%B5%E7%B4%A2%E5%BC%95%20%2B%20Tool%20%E6%AA%A2%E7%B4%A2%5D%0A%20%20%20%20P1%20-.-%3E%7C%E6%85%A2%E3%80%81%E8%B2%B4%E3%80%81%E9%95%B7%E6%9B%B8%E4%B8%8D%E6%BA%96%7C%20X1%5B%E6%B7%98%E6%B1%B0%5D%0A%20%20%20%20P2%20-.-%3E%7C%E4%B8%9F%E7%B4%B0%E7%AF%80%E3%80%81%E4%BB%8D%E5%81%8F%E6%85%A2%7C%20X2%5B%E6%B7%98%E6%B1%B0%5D%0A%20%20%20%20P3%20--%3E%7C%E7%95%B6%E5%89%8D%E6%96%B9%E6%A1%88%7C%20OK%5B%E9%9B%B6%E5%B9%BB%E8%A6%BA%20%2B%20%E5%8F%AF%E6%BA%AF%E6%BA%90%5D",[74,75,77],"h3",{"id":76},"階段一全文直塞-context最簡單也最先暴露問題","階段一：全文直塞 Context（最簡單，也最先暴露問題）",[14,79,80,83,84,87,88,91,92,95],{},[46,81,82],{},"做法："," 用戶開啟一本書提問時，將提取出的 ",[46,85,86],{},"全部正文"," 放進 System Prompt 或 User 訊息，交給對話模型作答。若全書超過約 ",[46,89,90],{},"40 萬字元","，則 ",[46,93,94],{},"硬截斷","——只保留前面一段，後續章節對模型不可見。",[14,97,98],{},[46,99,100],{},"優點：",[18,102,103,106,109],{},[21,104,105],{},"實作成本極低，幾乎不需要預處理；",[21,107,108],{},"短書、結構簡單的文件效果尚可——模型確實「看到了整本書」；",[21,110,111],{},"互動簡單：問就能答，沒有「請先等待分析」的等待狀態。",[14,113,114],{},[46,115,116],{},"缺點（很快變得不可接受）：",[18,118,119,125,131,141],{},[21,120,121,124],{},[46,122,123],{},"回應慢","：每次提問都要把海量文字送進模型，首 Token 延遲和總耗時隨書長線性惡化；",[21,126,127,130],{},[46,128,129],{},"Token 成本高","：同一本書每問一次就重複付一遍全文的輸入費用；",[21,132,133,136,137,140],{},[46,134,135],{},"長書嚴重失真","：超過 40 萬字元後被截斷，後半本、附錄、結論章節等於不存在，且 UI 往往 ",[46,138,139],{},"沒有明確告知"," 已截斷；",[21,142,143,146,147,150],{},[46,144,145],{},"檢索粒度為零","：模型要在幾十萬字裡「大海撈針」，容易漏細節，也更容易產生 ",[46,148,149],{},"看似合理、實則無據"," 的概括——閱讀場景最忌諱這類幻覺。",[14,152,153],{},"階段一適合驗證 MVP，不適合作為產品級方案。",[74,155,157],{"id":156},"階段二用輕量-llm-提取關鍵句壓縮-context但壓得太狠","階段二：用輕量 LLM 提取關鍵句（壓縮 Context，但壓得太狠）",[14,159,160,162,163,166,167,170,171,175],{},[46,161,82],{}," 在提問前（或首次開啟書時），用 ",[46,164,165],{},"成本更低的模型"," 對正文做一輪預處理：依 Spine 分章（或整書分段），抽取 ",[46,168,169],{},"關鍵句","，輸出時保留 ",[172,173,174],"code",{},"[f檔案-起始-結束]"," 形式的位置標記，再將摘錄拼成較短文字，作為後續問答的 Context。",[14,177,178,179,182],{},"典型鏈路是 ",[46,180,181],{},"Extract → Cache → Chat","：先離線或按需跑一遍提取並落庫，之後每次提問複用同一份「關鍵句合集」。這與許多文件 QA 原型裡「先壓縮文件、再拿壓縮結果做 QA」的思路相同，也是我們在階段二實際採用過的路線。",[14,184,185],{},[46,186,100],{},[18,188,189,196,199],{},[21,190,191,192,195],{},"每次提問送入模型的文字 ",[46,193,194],{},"明顯縮短","，單次 Token 消耗較階段一顯著下降；",[21,197,198],{},"預處理結果可快取，同一本書不必每次提問都重新提取；",[21,200,201],{},"已引入位置標記，為後續溯源打下基礎。",[14,203,204],{},[46,205,206],{},"缺點（長書場景下依然扛不住）：",[18,208,209,215,225,234],{},[21,210,211,214],{},[46,212,213],{},"細節大量丟失","：「關鍵句」由模型主觀篩選，論證鏈上的限定條件、反例等容易被丟掉，答案容易「正確但片面」；",[21,216,217,220,221,224],{},[46,218,219],{},"長書 Context 仍然偏大","：大部頭作品即便只留關鍵句，拼接後的輸入依然可觀，",[46,222,223],{},"延遲和成本只是緩解，沒有根治","；",[21,226,227,230,231,224],{},[46,228,229],{},"雙重 LLM 誤差","：提取階段可能漏選，問答階段又可能誤讀摘錄，錯誤會 ",[46,232,233],{},"疊加",[21,235,236,239,240,243],{},[46,237,238],{},"靜態 Context","：無論用戶問的是某一章細節還是全書結構，送進模型的都是 ",[46,241,242],{},"同一份預提取文字","，無法依問題動態收窄範圍。",[14,245,246,247,250],{},"這一階段的教訓很明確：",[46,248,249],{},"問題不在「有沒有壓縮」，而在「壓縮是否按需、以及能否回到原文」","。",[74,252,254],{"id":253},"階段三片段索引-tool-按需檢索-原文回傳當前方案","階段三：片段索引 + Tool 按需檢索 + 原文回傳（當前方案）",[14,256,257,259,260,267],{},[46,258,82],{}," 基本思路參考了 ",[261,262,266],"a",{"href":263,"rel":264},"https://github.com/VectifyAI/PageIndex",[265],"nofollow","PageIndex","，相對階段二，核心變化有三點：",[269,270,271,277,287],"ol",{},[21,272,273,276],{},[46,274,275],{},"預處理產物是結構化索引","（目錄級摘要 + 精確字元 span），而不是把摘錄直接當作問答 Context；",[21,278,279,282,283,286],{},[46,280,281],{},"每次提問由模型透過 Tool Calling 按需檢索","，再 ",[46,284,285],{},"拉取帶位置標記的原文"," 作答；",[21,288,289,292],{},[46,290,291],{},"System Prompt 與前端聯動","，約束引用格式，並支援點擊角標跳轉、高亮原文。",[14,294,295],{},[46,296,297],{},"三階段對比：",[299,300,301,320],"table",{},[302,303,304],"thead",{},[305,306,307,311,314,317],"tr",{},[308,309,310],"th",{},"維度",[308,312,313],{},"階段一（全文直塞）",[308,315,316],{},"階段二（關鍵句提取）",[308,318,319],{},"階段三（當前）",[321,322,323,342,356,370,384,401],"tbody",{},[305,324,325,329,332,335],{},[326,327,328],"td",{},"單次提問 Context",[326,330,331],{},"全書（或截斷後的前半本）",[326,333,334],{},"預提取關鍵句合集",[326,336,337,338,341],{},"僅與問題相關的少量 ",[46,339,340],{},"原文"," 片段",[305,343,344,347,350,353],{},[326,345,346],{},"長書準確性",[326,348,349],{},"超 40 萬字元後嚴重下降",[326,351,352],{},"依賴提取品質，易丟細節",[326,354,355],{},"依目錄/span 檢索，不受全書長度硬截斷",[305,357,358,361,364,367],{},[326,359,360],{},"回應速度",[326,362,363],{},"慢",[326,365,366],{},"略好，長書仍慢",[326,368,369],{},"檢索 + 短 Context，明顯更快",[305,371,372,375,378,381],{},[326,373,374],{},"Token 成本",[326,376,377],{},"極高",[326,379,380],{},"中等偏高",[326,382,383],{},"預處理攤銷 + 按需付費",[305,385,386,389,392,395],{},[326,387,388],{},"溯源能力",[326,390,391],{},"弱（難標註出處）",[326,393,394],{},"有位置標記，但內容已是二次篩選",[326,396,397,398],{},"角標對應 ",[46,399,400],{},"真實原文 span",[305,402,403,406,409,412],{},[326,404,405],{},"工程複雜度",[326,407,408],{},"低",[326,410,411],{},"中",[326,413,414],{},"高",[14,416,417,420,421,424,425,428,429,432],{},[46,418,419],{},"為何停在階段三："," 閱讀場景的零幻覺，關鍵不是「讓模型看過盡量多的字」，而是 ",[46,422,423],{},"「作答前必須拿到與問題相關的原文證據」","。階段一、二都在 Context ",[46,426,427],{},"體積"," 上做文章；階段三把鏈路拆成 ",[46,430,431],{},"「索引（預處理）→ 檢索（Tool）→ 取證（原文）→ 作答（約束生成）」","，才同時兼顧準確率、成本與可溯源性。",[14,434,435,436,439],{},"下文展開 ",[46,437,438],{},"階段三"," 的實作細節。",[11,441],{},[57,443,445],{"id":444},"二問題定義閱讀場景下幻覺比普通-chat-更致命","二、問題定義：閱讀場景下，幻覺比普通 Chat 更致命",[14,447,448,449,452],{},"普通 ChatBot 偶發錯誤，用戶往往可以容忍。但在 ",[46,450,451],{},"書籍 QA"," 裡，幻覺的代價更高：",[18,454,455,462,465],{},[21,456,457,458,461],{},"用戶問的是 ",[46,459,460],{},"這本書"," 說了什麼，不是問模型的 parametric memory；",[21,463,464],{},"一句似是而非的「書中觀點」，可能誤導筆記、引用甚至二次傳播；",[21,466,467],{},"沒有出處，用戶無法核實，產品信任很難建立。",[14,469,470,471,474],{},"因此，「零幻覺」在工程上落地為三條 ",[46,472,473],{},"可執行"," 的規則：",[269,476,477,483,489],{},[21,478,479,482],{},[46,480,481],{},"書內問題必須先查書","：凡可能與當前書籍相關的問題，模型必須先走檢索（Tool），再組織答案；",[21,484,485,488],{},[46,486,487],{},"答案必須可溯源","：關鍵結論附帶原文位置標記，前端可解析並跳轉高亮；",[21,490,491,494],{},[46,492,493],{},"查不到就說查不到","：書中沒有的內容應明確告知，而不是用通用知識冒充「書中觀點」。",[14,496,497,498,500],{},"下文依 ",[46,499,438],{}," 的數據流，說明上述規則如何落地。",[11,502],{},[57,504,506],{"id":505},"三整體架構預處理-工具檢索-約束生成-可點擊溯源","三、整體架構：預處理 → 工具檢索 → 約束生成 → 可點擊溯源",[69,508],{":config":71,"code":509},"flowchart%20TB%0A%20%20%20%20subgraph%20prep%20%5B%E9%9B%A2%E7%B7%9A%2F%E9%A6%96%E6%AC%A1%E9%A0%90%E8%99%95%E7%90%86%5D%0A%20%20%20%20%20%20%20%20A%5B%E4%BE%9D%E7%9B%AE%E9%8C%84%E6%88%96%E9%95%B7%E5%BA%A6%E5%88%87%E5%88%86%E5%85%A8%E6%9B%B8%5D%20--%3E%20B%5BLLM%20%E7%94%A2%E7%94%9F%E7%89%87%E6%AE%B5%E6%91%98%E8%A6%81%5D%0A%20%20%20%20%20%20%20%20B%20--%3E%20C%5B%E6%9C%AC%E6%A9%9F%E6%8C%81%E4%B9%85%E5%8C%96%20Segment%20%E5%BF%AB%E5%8F%96%5D%0A%20%20%20%20end%0A%0A%20%20%20%20subgraph%20ask%20%5B%E7%94%A8%E6%88%B6%E6%8F%90%E5%95%8F%5D%0A%20%20%20%20%20%20%20%20D%5B%E7%94%A8%E6%88%B6%E8%BC%B8%E5%85%A5%E5%95%8F%E9%A1%8C%5D%20--%3E%20E%7B%E5%B7%B2%E6%9C%89%20Segment%20%E5%BF%AB%E5%8F%96%3F%7D%0A%20%20%20%20%20%20%20%20E%20--%3E%7C%E5%90%A6%7C%20F%5B%E6%8F%90%E5%8F%96%E5%85%A8%E6%96%87%20%2F%20%E8%A9%A2%E5%95%8F%E6%98%AF%E5%90%A6%E9%A0%90%E8%99%95%E7%90%86%5D%0A%20%20%20%20%20%20%20%20F%20--%3E%20prep%0A%20%20%20%20%20%20%20%20E%20--%3E%7C%E6%98%AF%7C%20G%5B%E8%A8%BB%E5%86%8A%20Tool%20Calling%5D%0A%20%20%20%20end%0A%0A%20%20%20%20subgraph%20retrieve%20%5B%E5%B7%A5%E5%85%B7%E6%AA%A2%E7%B4%A2%5D%0A%20%20%20%20%20%20%20%20G%20--%3E%20H%7B%E5%95%8F%E9%A1%8C%E9%A1%9E%E5%9E%8B%7D%0A%20%20%20%20%20%20%20%20H%20--%3E%7C%E5%85%A8%E6%9B%B8%E6%A6%82%E8%A6%BD%2F%E6%9B%B8%E8%A9%95%7C%20I%5Bget_full_book_segment_summaries%5D%0A%20%20%20%20%20%20%20%20H%20--%3E%7C%E5%85%B7%E9%AB%94%E4%BA%8B%E5%AF%A6%2F%E4%BA%BA%E7%89%A9%2F%E7%AB%A0%E7%AF%80%7C%20J%5Bget_related_segment_summaries%5D%0A%20%20%20%20%20%20%20%20J%20--%3E%20K%5BLLM%20%E5%BE%9E%E6%91%98%E8%A6%81%E7%9B%AE%E9%8C%84%E4%B8%AD%E9%81%B8%E7%9B%B8%E9%97%9C%E7%89%87%E6%AE%B5%20ID%5D%0A%20%20%20%20%20%20%20%20K%20--%3E%20L%5B%E4%BE%9D%20span%20%E6%8B%89%E5%8F%96%E5%8E%9F%E6%96%87%20%2B%20%E4%BD%8D%E7%BD%AE%E6%A8%99%E8%A8%98%5D%0A%20%20%20%20%20%20%20%20I%20--%3E%20M%5B%E6%8B%BC%E6%8E%A5%E5%85%A8%E6%9B%B8%E7%89%87%E6%AE%B5%E6%91%98%E8%A6%81%5D%0A%20%20%20%20end%0A%0A%20%20%20%20subgraph%20answer%20%5B%E7%94%9F%E6%88%90%E8%88%87%E5%B1%95%E7%A4%BA%5D%0A%20%20%20%20%20%20%20%20L%20--%3E%20N%5BTool%20%E7%B5%90%E6%9E%9C%E5%9B%9E%E5%82%B3%E6%A8%A1%E5%9E%8B%5D%0A%20%20%20%20%20%20%20%20M%20--%3E%20N%0A%20%20%20%20%20%20%20%20N%20--%3E%20O%5BSystem%20Prompt%20%E7%B4%84%E6%9D%9F%E5%BC%95%E7%94%A8%E6%A0%BC%E5%BC%8F%5D%0A%20%20%20%20%20%20%20%20O%20--%3E%20P%5B%E4%B8%B2%E6%B5%81%E8%BC%B8%E5%87%BA%E7%AD%94%E6%A1%88%20%2B%20%E4%BD%8D%E7%BD%AE%E8%A7%92%E6%A8%99%5D%0A%20%20%20%20%20%20%20%20P%20--%3E%20Q%5B%E6%B8%B2%E6%9F%93%E5%8F%AF%E9%BB%9E%E6%93%8A%E5%BC%95%E7%94%A8%E8%A7%92%E6%A8%99%5D%0A%20%20%20%20%20%20%20%20Q%20--%3E%20R%5B%E9%BB%9E%E6%93%8A%20%E2%86%92%20%E9%A0%90%E8%A6%BD%E5%8E%9F%E6%96%87%20%E2%86%92%20%E8%B7%B3%E8%BD%89%E9%AB%98%E4%BA%AE%5D%0A%20%20%20%20end",[14,511,512,513,250],{},"核心思路可概括為：",[46,514,515],{},"不讓模型「憑記憶答題」，而是讓它「先取證、再作答、並標註出處」",[11,517],{},[57,519,521],{"id":520},"四預處理把整本書變成可檢索的片段索引","四、預處理：把整本書變成可檢索的「片段索引」",[14,523,524,525,528,529,532,533,536,537,540,541,544],{},"若每次提問仍採用 ",[46,526,527],{},"階段一"," 的全文 Context，長書必然爆 Token，檢索粒度也過粗。階段三的解法是：用戶首次對某本書發起 AI 對話時，背景非同步跑 ",[46,530,531],{},"片段摘要任務","，依 ",[46,534,535],{},"目錄結構"," 或 ",[46,538,539],{},"文字長度"," 將全書切成若干 ",[172,542,543],{},"Segment","，為每個片段產生摘要，並持久化到本機 IndexedDB。",[14,546,547,548,550,551,554],{},"每個 ",[172,549,543],{}," 在數據結構上包含摘要與 ",[46,552,553],{},"正文物理位置","：",[299,556,557,567],{},[302,558,559],{},[305,560,561,564],{},[308,562,563],{},"欄位",[308,565,566],{},"含義",[321,568,569,583,596,606],{},[305,570,571,580],{},[326,572,573,576,577],{},[172,574,575],{},"startFileIndex"," / ",[172,578,579],{},"endFileIndex",[326,581,582],{},"Spine 檔案索引（PDF 則每頁一個檔案）",[305,584,585,593],{},[326,586,587,576,590],{},[172,588,589],{},"startOffset",[172,591,592],{},"endOffset",[326,594,595],{},"字元級起迄偏移",[305,597,598,603],{},[326,599,600],{},[172,601,602],{},"sequence",[326,604,605],{},"線性閱讀順序",[305,607,608,613],{},[326,609,610],{},[172,611,612],{},"title",[326,614,615],{},"對應目錄標題",[14,617,618],{},"切分策略兼顧精度與成本：單一目錄正文不超過約 20KB 時只摘要該節點；同級目錄會合併成批（15KB～20KB）再呼叫 LLM；無目錄的大塊正文則依 3～4 萬字元區間切段。",[14,620,621,622,625,626,629],{},"摘要生成時的 System Prompt 會要求 ",[46,623,624],{},"保留原文位置標記","（格式 ",[172,627,628],{},"[f數字-數字-數字]","），以便後續 Tool 回傳原文時，位置資訊與 spine 字元偏移一致。核心約束如下：",[631,632,638],"pre",{"className":633,"code":635,"language":636,"meta":637},[634],"language-text","若摘要內容與原文某段相關，須保留段末位置資訊，格式 [f數字-數字-數字]（如 [f1-90-109]）。\n位置標記是整體，禁止修改、合併或省略其中的任何字元或數值。\n","text","",[172,639,635],{"__ignoreMap":637},[14,641,642,643,646],{},"預處理完成後，問答不再依賴「整書 Context」，而是依賴 ",[46,644,645],{},"結構化片段索引","——這是長書場景下零幻覺的工程前提。",[11,648],{},[57,650,652],{"id":651},"五位置標記體系把出處編碼進文字","五、位置標記體系：把「出處」編碼進文字",[14,654,655,656,659],{},"零幻覺不僅要求內容來自原文，還要求 ",[46,657,658],{},"出處可機器解析、可在 UI 中跳轉","。我們採用內嵌位置標記：",[631,661,664],{"className":662,"code":663,"language":636},[634],"[f{fileIndex}-{startChar}-{endChar}]\n",[172,665,663],{"__ignoreMap":637},[14,667,668,669,672],{},"例如 ",[172,670,671],{},"[f5-123-165]"," 表示：第 5 個 Spine 檔案（從 0 起算）中，字元偏移 123～165 的文字區間。",[74,674,676],{"id":675},"_51-標記如何寫入正文","5.1 標記如何寫入正文",[14,678,679,680,683],{},"正文提取層在輸出片段時，為每個小段在段末寫入 ",[172,681,682],{},"[f{fileIndex}-{start}-{end}]","。示意：",[631,685,690],{"className":686,"code":688,"language":689,"meta":637},[687],"language-typescript","const position = `[f${fileIndex}-${absOffset}-${absOffset + segment.length}]`;\nfileLines.push(segment.text.trim() + position);\n","typescript",[172,691,688],{"__ignoreMap":637},[14,693,694,695,698],{},"無論是預處理摘要還是 Tool 回傳的原文摘錄，位置資訊都與 ",[46,696,697],{},"Spine 字元偏移"," 對齊，而不是讓模型「估算頁碼」。",[74,700,702],{"id":701},"_52-對模型輸出的約束","5.2 對模型輸出的約束",[14,704,705,706,712],{},"組裝 System Prompt 時，我們單獨約定了 ",[46,707,708],{},[709,710,711],"span",{},"Position Citation Rules","，核心五條：",[269,714,715,725,735,741,750],{},[21,716,717,720,721,724],{},[46,718,719],{},"標準格式","：必須使用 ",[172,722,723],{},"[f_fileIndex-startChar-endChar]","，三段數字缺一不可；",[21,726,727,730,731,734],{},[46,728,729],{},"只引用當前來源","：角標須 ",[46,732,733],{},"原樣複製"," 自本輪 System/User 訊息或 Tool 回傳文字中的標記；",[21,736,737,740],{},[46,738,739],{},"禁止偽造","：不得自行計算、修改或編造位置；",[21,742,743,746,747,224],{},[46,744,745],{},"寧缺毋濫","：當前上下文沒有合法標記時，正常作答即可，",[46,748,749],{},"不要輸出任何位置標記",[21,751,752,755],{},[46,753,754],{},"緊跟論述","：標記須緊跟相關句段，禁止在文末堆砌引用清單。",[14,757,758,759,762,763,766],{},"前端展示前還會過濾模型偶發輸出的 ",[46,760,761],{},"兩段位"," 非法標記（如 ",[172,764,765],{},"[f1-293]","），避免無效角標進入 UI。",[14,768,769],{},[35,770],{"alt":771,"src":772},"引用溯源彈窗","https://cdn.linghuxiong.com/resources/snapshots/ai-chat.png",[11,774],{},[57,776,778],{"id":777},"六tool-calling先檢索再回答","六、Tool Calling：先檢索，再回答",[14,780,781,782,785,786,789,790,250],{},"當對話綁定某本書（存在 ",[172,783,784],{},"resourceId","，且 ",[172,787,788],{},"chatType === 'chat'","）時，每次生成前會向模型註冊兩個 Tool，並掛載對應的 executor。整體遵循 OpenAI 相容的 ",[46,791,792],{},"function calling 迴圈",[74,794,796,797,800],{"id":795},"_61-get_related_segment_summaries-針對具體問題查片段","6.1 ",[172,798,799],{},"get_related_segment_summaries"," —— 針對具體問題查片段",[14,802,803,804,807],{},"適用於：概念、人物、情節、章節細節等 ",[46,805,806],{},"有明確檢索意圖"," 的問題。",[14,809,810],{},"流程簡述：",[269,812,813,820,826,833,847],{},[21,814,815,816,819],{},"模型將用戶口語 ",[46,817,818],{},"改寫為書中可能出現的術語","（System Prompt 中的「Optimize Search Queries」）；",[21,821,822,823,224],{},"呼叫 Tool，傳入 ",[172,824,825],{},"question",[21,827,828,829,832],{},"將所有片段摘要依 Token 預算 ",[46,830,831],{},"分批","（單批約 3 萬 Token，最多 5 批）；",[21,834,835,836,839,840,843,844,224],{},"每批發起一次 ",[46,837,838],{},"獨立的 LLM 請求","，從 ",[172,841,842],{},"{ id, title, summary }"," 清單中選出相關片段 ID（最多 5 個），回傳 JSON，形如 ",[172,845,846],{},"{\"Thinking\":\"...\",\"answer\":[\"1\",\"3\"]}",[21,848,849,850,852],{},"依選中 Segment 的 span，從 Spine ",[46,851,285],{},"（不是摘要），作為 Tool 結果回傳。",[14,854,855,858,859,862],{},[46,856,857],{},"關鍵設計：Tool 回傳原文，而非摘要。"," 模型作答時看到的是真實段落 + 內嵌 ",[172,860,861],{},"[f…]","，避免「摘要 → 再概括」帶來的漂移。",[74,864,866,867,870],{"id":865},"_62-get_full_book_segment_summaries-全書概覽類問題","6.2 ",[172,868,869],{},"get_full_book_segment_summaries"," —— 全書概覽類問題",[14,872,873,874,807],{},"適用於：「總結全書」「點評這本書」「整體結構/主題」等 ",[46,875,876],{},"需要全局視野",[14,878,879,880,883],{},"依閱讀順序拼接所有片段的 ",[172,881,882],{},"summary"," 回傳，避免逐段相關度篩選遺漏關鍵章節。",[74,885,887],{"id":886},"_63-system-prompt書優先工具優先","6.3 System Prompt：書優先、工具優先",[14,889,890,891,896],{},"綁定書籍時，System Prompt 注入 ",[46,892,893],{},[709,894,895],{},"Core Principles for Reading Assistant","，核心三條：",[631,898,901],{"className":899,"code":900,"language":636},[634],"1. Book First, Tool First\n   - 任何可能與書籍相關的問題，必須先呼叫工具檢索；\n   - 答案必須主要依據檢索結果，禁止不檢索就編造「書中內容」。\n\n2. General Knowledge as Fallback Only\n   - 僅當：純閒聊 / 用戶明確要求不用書 / 工具無結果時，才可使用通用知識；\n   - 若書中沒有，必須先聲明「書中未提及此內容」，再補充通用知識。\n\n3. Direct Style\n   - 直入主題，禁止「根據提供的材料…」「綜上所述…」等套話。\n",[172,902,900],{"__ignoreMap":637},[14,904,905,906,909,910,913],{},"生成層實作標準 Tool 迴圈：",[172,907,908],{},"tool_calls"," → 執行 executor → 追加 ",[172,911,912],{},"role: tool"," → 繼續請求，直到輸出最終文字。啟用 tools 時關閉 thinking 通道，避免與 function call 協定衝突。",[11,915],{},[57,917,919],{"id":918},"七前端溯源從角標到原文高亮","七、前端溯源：從角標到原文高亮",[14,921,922,923,925],{},"模型輸出的 ",[172,924,671],{}," 不會直接展示，在渲染層轉為可點擊引用。",[74,927,929],{"id":928},"_71-角標渲染","7.1 角標渲染",[14,931,932,933,936],{},"展示前將位置標記規範化為 Markdown 連結，例如 ",[172,934,935],{},"[1]([f5-123-165])","，再渲染為序號角標；同一位置多次出現時可去重，避免 UI 堆疊。",[74,938,940],{"id":939},"_72-點擊互動","7.2 點擊互動",[269,942,943,952,958],{},[21,944,945,948,949,951],{},[46,946,947],{},"首次點擊","：解析 ",[172,950,861],{}," → 取 fileIndex 與字元偏移 → 從 Spine 原文提取文字 → 彈出預覽（可帶目錄標題）；",[21,953,954,957],{},[46,955,956],{},"再次點擊同一角標","：關閉彈窗；",[21,959,960,963],{},[46,961,962],{},"確認跳轉","：開啟閱讀視圖，依字元區間高亮。",[14,965,966,967,970,971,250],{},"從模型複製的標記到用戶看到的原文，中間 ",[46,968,969],{},"不經 LLM 二次加工","，溯源鏈路全程 ",[46,972,973],{},"確定、可重現",[11,975],{},[57,977,979],{"id":978},"八邊界情況與誠實降級","八、邊界情況與誠實降級",[14,981,982,983,554],{},"零幻覺不等於「永遠有答案」，而是 ",[46,984,985],{},"沒有證據時不瞎編",[299,987,988,998],{},[302,989,990],{},[305,991,992,995],{},[308,993,994],{},"場景",[308,996,997],{},"行為",[321,999,1000,1008,1020,1028,1036],{},[305,1001,1002,1005],{},[326,1003,1004],{},"片段摘要尚未產生",[326,1006,1007],{},"先提取全文做摘要",[305,1009,1010,1013],{},[326,1011,1012],{},"Tool 檢索無結果",[326,1014,1015,1016,1019],{},"回傳 ",[172,1017,1018],{},"(No relevant segment excerpts found…)","，模型應聲明書中未提及",[305,1021,1022,1025],{},[326,1023,1024],{},"模型輸出了非法兩段位標記",[326,1026,1027],{},"前端過濾，不展示無效角標",[305,1029,1030,1033],{},[326,1031,1032],{},"用戶純閒聊",[326,1034,1035],{},"System Prompt 允許脫離書籍，用通用知識回答",[305,1037,1038,1041],{},[326,1039,1040],{},"導出對話",[326,1042,1043],{},"可將角標轉為閱讀器深連結，便於分享或歸檔",[14,1045,1046],{},[35,1047],{"alt":1048,"src":1049},"對話導出","https://cdn.linghuxiong.com/resources/snapshots/ai-chat-export.png",[11,1051],{},[57,1053,1055],{"id":1054},"九設計取捨為什麼不用向量-rag","九、設計取捨：為什麼不用「向量 RAG」？",[14,1057,1058,1059,1062],{},"做文件 QA 的同行常會問：既然要做檢索增強，為什麼不走 ",[46,1060,1061],{},"Embedding + 向量庫 Top-K"," 這條標準路線？",[14,1064,1065,1066,1069,1070,1073,1074,1077,1078,1081,1082,1085],{},"實際上 ",[46,1067,1068],{},"我們也在做 RAG","——每次回答前都會先查書、再生成。差別在於：社群語境裡的 RAG 往往預設包含 ",[46,1071,1072],{},"向量化與相似度檢索","；當前方案是 ",[46,1075,1076],{},"「片段索引 + Tool 按需拉原文」","（階段三），",[46,1079,1080],{},"刻意不引入向量層","。以下從 ",[46,1083,1084],{},"架構約束"," 說明取捨，並非否定向量 RAG 的價值。",[74,1087,1089],{"id":1088},"界定範圍不是不用檢索而是不用向量檢索","界定範圍：不是不用檢索，而是不用「向量檢索」",[18,1091,1092,1101],{},[21,1093,1094,1097,1098,250],{},[46,1095,1096],{},"廣義 RAG","：檢索相關材料 → 再生成 → ",[46,1099,1100],{},"我們在做",[21,1102,1103,1106,1107,250],{},[46,1104,1105],{},"向量 RAG","：召回依賴 Embedding 相似度 → ",[46,1108,1109],{},"當前版本不做",[14,1111,1112,1113,1116,1117,1120],{},"全書預處理為 ",[46,1114,1115],{},"片段摘要索引","；提問時模型透過 Tool 選段，再 ",[46,1118,1119],{},"回傳原文","。檢索增強存在，但不依賴單獨的 embedding 模型與向量索引維護。",[11,1122],{},[74,1124,1126],{"id":1125},"原因一支援自訂-llm-provider設定鏈路要盡量短","原因一：支援自訂 LLM Provider，設定鏈路要盡量短",[14,1128,1129,1130,1133,1134,1137],{},"產品允許用戶自由接入 ",[46,1131,1132],{},"自有 API Key","、自訂 Base URL，或使用 ",[46,1135,1136],{},"本機 Ollama","——對話模型由用戶自選，成本和數據路徑可控。這對許多自託管、多模型對比的場景是硬需求。",[14,1139,1140],{},"疊加典型向量 RAG 後，整合面會明顯變寬：",[18,1142,1143,1154,1157],{},[21,1144,1145,1146,1149,1150,1153],{},"除 ",[46,1147,1148],{},"Chat 模型"," 外，通常還需 ",[46,1151,1152],{},"Embedding 模型","（另一個 model name，有時還是另一個 endpoint）；",[21,1155,1156],{},"Ollama 等本機部署還要單獨拉 embedding 模型，並處理維度、介面相容；",[21,1158,1159,1160,1163],{},"故障域變複雜：Chat 正常但 ",[46,1161,1162],{},"檢索為空"," 時，可能是 embedding、索引或維度不一致，排查成本高於「單 Provider 全鏈路」。",[14,1165,1166,1167,1170,1171,1174],{},"當前方案裡，",[46,1168,1169],{},"選段與作答共用同一套 Provider 設定","，避免「Chat 用 A、建索引用 B」。若你在做 ",[46,1172,1173],{},"可插拔 LLM"," 的應用，這往往比多幾個點的召回率更重要。",[14,1176,1177],{},[35,1178],{"alt":1179,"src":1180},"自訂 AI 服務商","https://cdn.linghuxiong.com/resources/snapshots/ai-customize-providers.png",[11,1182],{},[74,1184,1186],{"id":1185},"原因二embedding-與索引強綁定切換-provider-成本高","原因二：Embedding 與索引強綁定，切換 Provider 成本高",[14,1188,1189,1190,1193,1194,1197,1198,1201,1202,1205],{},"向量 RAG 裡常被低估的一點：",[46,1191,1192],{},"向量不是通用中間格式，而是某個 embedding 模型下的座標。"," 建庫用模型 A、查詢用模型 B 時，相似度通常 ",[46,1195,1196],{},"不可比","——換模型往往意味著 ",[46,1199,1200],{},"全書重新向量化","，且不同模型的 ",[46,1203,1204],{},"向量維度","（768 / 1024 / 1536 …）會綁死儲存 schema。",[14,1207,1208,1209,1212,1213,1216],{},"階段三持久化的是 ",[46,1210,1211],{},"結構化摘要 + 字元 span","，不存向量；切換 Chat 模型時 ",[46,1214,1215],{},"無需重建索引","，證據鏈（原文位置）不變。這與「用戶隨時對比不同 LLM」的目標更一致。",[11,1218],{},[74,1220,1222],{"id":1221},"原因三有目錄的長文件結構化路由往往已夠用","原因三：有目錄的長文件，結構化路由往往已夠用",[14,1224,1225,1226,1229,1230,1233,1234,1237,1238,1244],{},"電子書、PDF 通常有 ",[46,1227,1228],{},"章節結構","；預處理已產出 ",[46,1231,1232],{},"段標題 + 摘要","。對「某一章講了什麼」「書中如何定義某概念」類問題，在摘要目錄上選段再 ",[46,1235,1236],{},"拉回原文","，實務中效果穩定；且 Tool 回傳的是 ",[46,1239,1240,1241,1243],{},"帶 ",[172,1242,861],{}," 的原文","，零幻覺仍錨定在字元 span 上。",[14,1246,1247,1248,1251,1252,1255],{},"向量檢索在語意模糊、跨語言、長段落字面匹配等場景仍有優勢；在 ",[46,1249,1250],{},"有 TOC、可預處理、要強溯源"," 的閱讀器裡，優先把複雜度放在 ",[46,1253,1254],{},"Tool + 原文回傳 + 引用約束"," 上，ROI 通常更高。",[11,1257],{},[74,1259,1261],{"id":1260},"後續方向混合召回而非推倒重來","後續方向：混合召回，而非推倒重來",[14,1263,1264,1265,1268,1269,1272,1273,1276,1277,1280],{},"不排除將來增加 ",[46,1266,1267],{},"向量粗召回","（例如 embedding 只篩 Top-N 候選章節），最終仍走 ",[46,1270,1271],{},"選段 → 原文回傳 → 可點擊溯源","，零幻覺規則不變。若引入，會盡量滿足：Embedding ",[46,1274,1275],{},"可選","、換模型時 ",[46,1278,1279],{},"顯式提示重建索引","，避免 silent wrong retrieval。",[14,1282,1283,1284,250],{},"在此之前，優先保證：",[46,1285,1286],{},"任意 OpenAI 相容 Chat API 即可工作，換 Chat 模型不必重建本機索引",[11,1288],{},[57,1290,1292],{"id":1291},"十小結","十、小結",[299,1294,1295,1308],{},[302,1296,1297],{},[305,1298,1299,1302,1305],{},[308,1300,1301],{},"環節",[308,1303,1304],{},"手段",[308,1306,1307],{},"作用",[321,1309,1310,1321,1334,1347,1358,1369],{},[305,1311,1312,1315,1318],{},[326,1313,1314],{},"預處理",[326,1316,1317],{},"依目錄/長度切分 + 片段摘要快取",[326,1319,1320],{},"長書可檢索、可定位",[305,1322,1323,1326,1331],{},[326,1324,1325],{},"位置標記",[326,1327,1328,1330],{},[172,1329,174],{}," 寫入原文",[326,1332,1333],{},"出處可機器解析",[305,1335,1336,1339,1344],{},[326,1337,1338],{},"Tool 檢索",[326,1340,1341,1342],{},"依問題查片段/全書摘要，回傳 ",[46,1343,340],{},[326,1345,1346],{},"作答前強制取證",[305,1348,1349,1352,1355],{},[326,1350,1351],{},"System Prompt",[326,1353,1354],{},"書優先、禁止偽造角標、查不到要說",[326,1356,1357],{},"約束生成行為",[305,1359,1360,1363,1366],{},[326,1361,1362],{},"前端溯源",[326,1364,1365],{},"角標 → 預覽 → 跳轉高亮",[326,1367,1368],{},"用戶可核驗證據",[305,1370,1371,1374,1377],{},[326,1372,1373],{},"不用向量檢索",[326,1375,1376],{},"單 Provider、換 Chat 模型無需重建索引",[326,1378,1379],{},"降低整合與遷移成本",[14,1381,1382,1383,1386],{},"「零幻覺」不是指望模型從不犯錯，而是 ",[46,1384,1385],{},"用工程結構把輸出鎖在證據鏈上","：沒有檢索結果就不應冒充書中內容；有檢索結果則應給出可核驗的原文位置。",[14,1388,1389,1390,1393,1394,1397],{},"若你也在做 AI 閱讀或文件 QA，希望 ",[46,1391,1392],{},"全文直塞 → 關鍵句提取 → Tool-First 按需檢索"," 這條演進路徑，以及 ",[46,1395,1396],{},"內嵌位置標記 + 原文回傳"," 的做法，能作為可參考的一種實作。",[40,1399,1400],{},[14,1401,1402,1403,1408,1409,1413],{},"以上是我們在開發 ",[261,1404,1407],{"href":1405,"rel":1406},"https://reader.linghuxiong.com",[265],"令狐兄","（Foxycape）AI 閱讀器實踐心得，僅供參考。文末可前往 ",[261,1410,1412],{"href":1411},"/zh-hk#download","下載頁面"," 體驗閱讀器。",{"title":637,"searchDepth":1415,"depth":1415,"links":1416},2,[1417,1423,1424,1425,1426,1430,1437,1441,1442,1449],{"id":59,"depth":1415,"text":60,"children":1418},[1419,1421,1422],{"id":76,"depth":1420,"text":77},3,{"id":156,"depth":1420,"text":157},{"id":253,"depth":1420,"text":254},{"id":444,"depth":1415,"text":445},{"id":505,"depth":1415,"text":506},{"id":520,"depth":1415,"text":521},{"id":651,"depth":1415,"text":652,"children":1427},[1428,1429],{"id":675,"depth":1420,"text":676},{"id":701,"depth":1420,"text":702},{"id":777,"depth":1415,"text":778,"children":1431},[1432,1434,1436],{"id":795,"depth":1420,"text":1433},"6.1 get_related_segment_summaries —— 針對具體問題查片段",{"id":865,"depth":1420,"text":1435},"6.2 get_full_book_segment_summaries —— 全書概覽類問題",{"id":886,"depth":1420,"text":887},{"id":918,"depth":1415,"text":919,"children":1438},[1439,1440],{"id":928,"depth":1420,"text":929},{"id":939,"depth":1420,"text":940},{"id":978,"depth":1415,"text":979},{"id":1054,"depth":1415,"text":1055,"children":1443},[1444,1445,1446,1447,1448],{"id":1088,"depth":1420,"text":1089},{"id":1125,"depth":1420,"text":1126},{"id":1185,"depth":1420,"text":1186},{"id":1221,"depth":1420,"text":1222},{"id":1260,"depth":1420,"text":1261},{"id":1291,"depth":1415,"text":1292},null,false,"md",{},true,"/zh-hk/blog/zero-hallucination-qa",{"description":16},"zh-hk/blog/zero-hallucination-qa","6H7EH1nM6S2Wu-58l9NXgc86P-HMs8RfuXQoz6O63C8",1780489852799]