diff --git a/.env.example b/.env.example index 715f0a7c..02367252 100644 --- a/.env.example +++ b/.env.example @@ -15,6 +15,10 @@ FC_DB_SQLITE_PATH=./db # Cache Configuration FC_REDIS_URI=redis://localhost:6379/ +# Outbound HTTP User-Agent defaults +FC_HTTP_USER_AGENT_FEED="FeedCraft/2.0" +FC_HTTP_USER_AGENT_HTML="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36" + # LLM Configuration FC_LLM_API_BASE=https://api.openai.com/v1 FC_LLM_API_KEY=sk-your-api-key-here @@ -28,4 +32,4 @@ FC_PUPPETEER_HTTP_ENDPOINT=http://localhost:3000 # Frontend Configuration FRONTEND_PORT=5173 FRONTEND_HOST=localhost -VITE_API_BASE_URL=http://localhost:8080 \ No newline at end of file +VITE_API_BASE_URL=http://localhost:8080 diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index 0b0afce2..271bbdf0 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -99,7 +99,7 @@ jobs: # https://github.com/docker/build-push-action - name: Build and push Docker image id: build-and-push - uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # v6.18.0 + uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0 with: file: build/Dockerfile context: . diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 6d180a6a..8b954416 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -26,7 +26,7 @@ jobs: with: version: latest - - uses: pnpm/action-setup@v3 + - uses: pnpm/action-setup@v6 with: version: 9 diff --git a/README.md b/README.md index 8b15665e..c8823fd9 100644 --- a/README.md +++ b/README.md @@ -19,17 +19,17 @@ Doc: [English](https://feed-craft-doc.vercel.app/en) | [简体中文](https://fe **FeedCraft** is a simple, powerful RSS feed processing tool. It can serve as middleware to handle your RSS feeds; you can use it to extract the main text, perform intelligent translation, generate article summaries through AI, filter articles using natural language, and more. -It includes a built-in visual RSS generator (HTML/Curl/Search to RSS) that can turn web pages, API responses, or search results into RSS feeds. +It includes a built-in visual RSS generator (HTML/JSON/Search to RSS) that can turn web pages, JSON API responses, or search results into RSS feeds. **FeedCraft** 是一个简单、强大的 RSS 源处理工具. 他可以作为一个中间件处理你的 RSS 源, 你可以用它来提取正文、智能翻译、通过 AI 生成文章摘要、通过自然语言筛选文章等. -它内置了可视化 RSS 生成器 (HTML/Curl/Search to RSS),支持将网页、API (Curl) 或搜索结果转换为 RSS 订阅源。 +它内置了可视化 RSS 生成器 (HTML/JSON/Search to RSS),支持将网页、JSON API (Curl) 或搜索结果转换为 RSS 订阅源。 ## 核心特性 - 开源可自部署. 可以作为中间件与现有的任何 RSS 阅读器协同使用 - AI Power, 可以接入 Open AI 接口兼容的 LLM 对 RSS 进行处理, 可自定义 prompt -- **HTML/Curl/Search to RSS**: 内置可视化 RSS 生成器,支持将网页、API (Curl) 或搜索结果转换为 RSS 订阅源 +- **HTML/JSON/Search to RSS**: 内置可视化 RSS 生成器,支持将网页、JSON API (支持 Curl 语句导入) 或搜索结果转换为 RSS 订阅源 - 支持**便携模式**(portable mode, 即用即走, 只需要在原 RSS 地址前面加个前缀即可), 和**高级模式**(dock mode, 在后台页面自定义 RSS 地址和各类深度加工参数) @@ -87,7 +87,6 @@ https://feed-craft.colinx.one 控制台默认账号`admin`, 密码 `adminadmin`, 登陆后请尽快修改默认密码 ```yaml -version: "3" services: app.feed-craft: image: ghcr.io/colin-xkl/feed-craft @@ -129,6 +128,7 @@ services: FC_LLM_API_MODEL: gemini-pro/chatgpt-3.5/... # 默认使用的模型 FC_LLM_API_TYPE: openai # openai 或 ollama FC_DEFAULT_TARGET_LANG: zh-CN # (Optional) LLM 处理任务的默认目标语言 + LOG_LEVEL: info # (Optional) 日志级别,如 info, debug, trace service.redis: image: redis:6-alpine container_name: feedcraft_redis diff --git a/doc-site/src/content/docs/en/guides/advanced/customization.md b/doc-site/src/content/docs/en/guides/advanced/customization.md index 625e87f0..4fae8fa2 100644 --- a/doc-site/src/content/docs/en/guides/advanced/customization.md +++ b/doc-site/src/content/docs/en/guides/advanced/customization.md @@ -65,12 +65,15 @@ You can configure FeedCraft using environment variables in `docker-compose.yml`. - **FC_PUPPETEER_HTTP_ENDPOINT**: Address of the Browserless/Chrome instance. Required for `fulltext-plus`. - **FC_REDIS_URI**: Redis connection address. Used for caching to speed up processing and reduce AI token consumption. +- **FC_HTTP_USER_AGENT_FEED**: (Optional) Default `User-Agent` for feed-style outbound requests, such as fetching RSS/XML resources. Search provider requests are temporarily grouped into this same rule. +- **FC_HTTP_USER_AGENT_HTML**: (Optional) Default `User-Agent` for HTML page fetches, such as fulltext extraction and the HTML-to-RSS tooling. **Note:** If the value contains spaces or parentheses, it must be enclosed in quotes. - **FC_LLM_API_KEY**: API Key for OpenAI or compatible services (like DeepSeek, Gemini, etc.). - **FC_LLM_API_MODEL**: Default model to use (e.g., `gemini-pro`, `gpt-3.5-turbo`). **Multiple Models Support:** You can provide a comma-separated list of models (e.g., `gpt-3.5-turbo,gpt-4`). FeedCraft will randomly select a model for each request and automatically retry with others if a call fails. - **FC_LLM_API_BASE**: API endpoint address. For OpenAI-compatible APIs, usually ends with `/v1`. - **FC_LLM_API_TYPE**: (Optional) `openai` (default) or `ollama`. - **FC_LLM_MAX_CONCURRENCY**: (Optional) Global maximum concurrency for LLM requests (default: `3`). Limits concurrent API calls to prevent rate limits. - **FC_DOMAIN_MAX_CONCURRENCY**: (Optional) Maximum concurrent requests per target domain during web scraping like fulltext extraction (default: `3`). Prevents overwhelming target servers. +- **LOG_LEVEL**: (Optional) Log level for the backend application (e.g., `info`, `debug`, `trace`). Overrides the default level set by `ENV`. ### External Services diff --git a/doc-site/src/content/docs/en/guides/advanced/html-to-rss.md b/doc-site/src/content/docs/en/guides/advanced/html-to-rss.md index 8c7370cb..f9a21b44 100644 --- a/doc-site/src/content/docs/en/guides/advanced/html-to-rss.md +++ b/doc-site/src/content/docs/en/guides/advanced/html-to-rss.md @@ -11,7 +11,7 @@ sidebar: FeedCraft includes a visual **HTML to RSS** tool that allows you to generate selectors for creating RSS feeds from websites that don't provide them natively. :::note -This tool is designed for HTML pages. If you need to process a JSON API, use the [CURL to RSS](/en/guides/advanced/curl-to-rss/) instead. +This tool is designed for HTML pages. If you need to process a JSON API, use the [JSON to RSS](/en/guides/advanced/json-to-rss/) instead. ::: ## Overview diff --git a/doc-site/src/content/docs/en/guides/advanced/curl-to-rss.md b/doc-site/src/content/docs/en/guides/advanced/json-to-rss.md similarity index 73% rename from doc-site/src/content/docs/en/guides/advanced/curl-to-rss.md rename to doc-site/src/content/docs/en/guides/advanced/json-to-rss.md index d1f06b2c..0fbd6e9e 100644 --- a/doc-site/src/content/docs/en/guides/advanced/curl-to-rss.md +++ b/doc-site/src/content/docs/en/guides/advanced/json-to-rss.md @@ -1,5 +1,5 @@ --- -title: CURL to RSS +title: JSON to RSS description: Convert any JSON API response into an RSS feed with jq selectors and optional templates. sidebar: order: 3 @@ -8,11 +8,11 @@ sidebar: variant: success --- -FeedCraft includes a **CURL to RSS** tool that allows you to fetch data from JSON APIs, extract fields with `jq`, and optionally post-process them with templates before generating an RSS feed. +FeedCraft includes a **JSON to RSS** tool that allows you to fetch data from JSON APIs, extract fields with `jq`, and optionally post-process them with templates before generating an RSS feed. ## Overview -The CURL to RSS tool helps you: +The JSON to RSS tool helps you: 1. **Fetch** JSON data from an API endpoint (supporting custom headers and methods). 2. **Parse** the JSON structure using `jq` syntax, then optionally use templates to build the final RSS fields. @@ -21,13 +21,13 @@ The CURL to RSS tool helps you: ## How to use -Navigate to **Worktable > Curl to RSS** in the admin dashboard. +Navigate to **Worktable > JSON to RSS** in the admin dashboard. ### Step 1: Request Configuration You need to define how to fetch the JSON data. -- **Import from Curl**: You can paste a `curl` command to automatically populate the URL, method, headers, and body. This is useful if you copy the request from your browser's Developer Tools. +- **Import from cURL**: You can paste a `curl` command to automatically populate the URL, method, headers, and body. This is useful if you copy the request from your browser's Developer Tools. - **Method**: Select `GET` or `POST`. - **URL**: The API endpoint URL. - **Headers**: Add any necessary headers (e.g., `Authorization`, `Content-Type`). @@ -50,6 +50,28 @@ The tool uses **[jq](https://jqlang.github.io/jq/)** syntax for querying JSON, a - **Date Selector**: (Optional) Path to the publication date. - **Content Selector**: (Optional) Path to the full content or summary. +#### Using Templates (Optional) + +You can use [Go Templates](https://pkg.go.dev/text/template) to further process extracted values. + +**Available Variables:** + +- `.Fields`: The parsed field values (e.g., `.Fields.Title`, `.Fields.Link`, `.Fields.Date`, `.Fields.Description`). +- `.Item`: The raw JSON item object (e.g., `.Item.id`, `.Item.author.name`). + +**Built-in Functions:** + +- `trimSpace`: Removes leading and trailing whitespace. +- `trim`: Removes specified leading and trailing characters. +- `default`: Provides a fallback value if the field is empty. + +**Examples:** + +- **Clean up whitespace in title**: `{{ .Fields.Title | trimSpace }}` +- **Build absolute URLs**: `https://example.com/article/{{ .Item.id }}` +- **Remove specific prefixes**: `{{ .Fields.Description | trim "Prefix: " }}` +- **Fallback values**: `{{ default .Fields.Description "No summary available" }}` + Click **Run Preview** to verify your selectors, then click **Next Step**. ### Step 3: Feed Metadata diff --git a/doc-site/src/content/docs/en/guides/advanced/tools.md b/doc-site/src/content/docs/en/guides/advanced/tools.md index 9f3c114e..037b9392 100644 --- a/doc-site/src/content/docs/en/guides/advanced/tools.md +++ b/doc-site/src/content/docs/en/guides/advanced/tools.md @@ -48,6 +48,21 @@ The **Craft Dependencies** (System Health) tool visualizes the internal relation If you encounter errors like "Craft not found", use this tool to trace the broken link in your configuration. ::: +## System Runtime + +The **System Runtime** (Observability) tool provides a comprehensive dashboard for monitoring the health and execution status of your resources. + +- **Usage**: + 1. Navigate to **Tools > System Runtime**. +- **Features**: + - **Resource Health**: View the current status (Healthy, Degraded, Paused) of Recipes and other components, including consecutive failures. + - **Execution Logs**: Track detailed execution history, success rates, and specific error types (e.g., Timeout, Network, Parse) across all runs. + - **System Notifications**: Review automated alerts regarding resource state transitions (e.g., when a Recipe becomes degraded). You can also subscribe to these alerts via the built-in RSS feed at `/system/notifications/rss`. + +:::tip +If a Recipe fails repeatedly and becomes "Paused", you can use the System Runtime dashboard to manually "Resume" it after fixing the underlying issue. +::: + ## Debug Tools ### LLM Debug diff --git a/doc-site/src/content/docs/en/guides/start/concepts.md b/doc-site/src/content/docs/en/guides/start/concepts.md index ec205634..b49e9e91 100644 --- a/doc-site/src/content/docs/en/guides/start/concepts.md +++ b/doc-site/src/content/docs/en/guides/start/concepts.md @@ -57,6 +57,11 @@ You can configure processors for a Topic Feed to automatically handle the combin - **Limit**: Keeps only the most recent items. **Managing Topic Feeds:** + +:::caution +The Topic Feed feature is currently under active development. Its UI entry point in the admin dashboard has been temporarily hidden until the feature is stable and ready for general use. +::: + Navigate to **Worktable > Topic Feed** to create and manage topics. - **Create**: Define a title, add multiple input URIs (e.g. `feedcraft://recipe/my-recipe` or external RSS URLs), and set your aggregator config. diff --git a/doc-site/src/content/docs/zh-tw/guides/advanced/customization.md b/doc-site/src/content/docs/zh-tw/guides/advanced/customization.md index 2d656a43..a82ff01c 100644 --- a/doc-site/src/content/docs/zh-tw/guides/advanced/customization.md +++ b/doc-site/src/content/docs/zh-tw/guides/advanced/customization.md @@ -65,12 +65,15 @@ sidebar: - **FC_PUPPETEER_HTTP_ENDPOINT**: Browserless/Chrome 實例的地址。`fulltext-plus` 功能必須。 - **FC_REDIS_URI**: Redis 連線地址。用於快取,加快處理速度並減少 AI Token 消耗。 +- **FC_HTTP_USER_AGENT_FEED**: (可選)feed 類外部請求的預設 `User-Agent`,例如抓取 RSS/XML 資源時使用。搜尋提供方請求目前也暫時歸入這一規則。 +- **FC_HTTP_USER_AGENT_HTML**: (可選)HTML 頁面抓取的預設 `User-Agent`,例如全文提取和 HTML 轉 RSS 工具使用。**注意:** 如果該值包含空格或括號,必須使用引號括起來。 - **FC_LLM_API_KEY**: OpenAI 或相容服務(如 DeepSeek, Gemini 等)的 API Key。 - **FC_LLM_API_MODEL**: 預設使用的模型(如 `gemini-pro`, `gpt-3.5-turbo`)。**支援多個模型:** 你可以提供一個逗號分隔的模型列表(例如 `gpt-3.5-turbo,gpt-4`)。FeedCraft 會為每個請求隨機選擇一個模型,如果調用失敗,會自動重試列表中的其他模型。 - **FC_LLM_API_BASE**: API 介面地址。如果是相容 OpenAI 的 API,通常以 `/v1` 結尾。 - **FC_LLM_API_TYPE**: (可選) `openai` (預設) 或 `ollama`. - **FC_LLM_MAX_CONCURRENCY**: (可選) 全局最大 LLM 併發請求數(預設: `3`)。用於限制併發請求數量以防止觸發 API 速率限制。 - **FC_DOMAIN_MAX_CONCURRENCY**: (可選) 網頁抓取(如全文提取)時每個目標域名的最大併發數(預設: `3`)。防止抓取目標伺服器負載過高。 +- **LOG_LEVEL**: (可選) 後端應用的日誌級別 (例如 `info`, `debug`, `trace`)。覆蓋 `ENV` 設定的預設級別。 ### 外部服務 diff --git a/doc-site/src/content/docs/zh-tw/guides/advanced/html-to-rss.md b/doc-site/src/content/docs/zh-tw/guides/advanced/html-to-rss.md index c4dde513..b2d0b751 100644 --- a/doc-site/src/content/docs/zh-tw/guides/advanced/html-to-rss.md +++ b/doc-site/src/content/docs/zh-tw/guides/advanced/html-to-rss.md @@ -11,7 +11,7 @@ sidebar: FeedCraft 內建了視覺化的 **從 HTML 網頁生成 RSS (HTML to RSS)** 工具,允許你生成選取器,以便為那些沒有提供 RSS 的網站建立訂閱源。 :::note -此工具專為 HTML 頁面設計。如果你需要處理 JSON API,請使用 [從 CURL 語句生成 RSS](/zh-tw/guides/advanced/curl-to-rss/)。 +此工具專為 HTML 頁面設計。如果你需要處理 JSON API,請使用 [從 JSON 生成 RSS](/zh-tw/guides/advanced/json-to-rss/)。 ::: ## 概覽 diff --git a/doc-site/src/content/docs/zh-tw/guides/advanced/curl-to-rss.md b/doc-site/src/content/docs/zh-tw/guides/advanced/json-to-rss.md similarity index 74% rename from doc-site/src/content/docs/zh-tw/guides/advanced/curl-to-rss.md rename to doc-site/src/content/docs/zh-tw/guides/advanced/json-to-rss.md index 1ca8302d..8b70fae7 100644 --- a/doc-site/src/content/docs/zh-tw/guides/advanced/curl-to-rss.md +++ b/doc-site/src/content/docs/zh-tw/guides/advanced/json-to-rss.md @@ -1,5 +1,5 @@ --- -title: 從 CURL 語句生成 RSS +title: 從 JSON 生成 RSS description: 使用 jq 提取欄位,並透過可選模板將任意 JSON API 響應轉換為 RSS 訂閱源。 sidebar: order: 3 @@ -8,7 +8,7 @@ sidebar: variant: success --- -FeedCraft 包含一個 **從 CURL 語句生成 RSS (CURL to RSS)** 工具,允許你從 JSON API 獲取資料,先用 `jq` 提取欄位,再透過可選模板將其轉換為 RSS 訂閱源。 +FeedCraft 包含一個 **從 JSON 生成 RSS (JSON to RSS)** 工具,允許你從 JSON API 獲取資料,先用 `jq` 提取欄位,再透過可選模板將其轉換為 RSS 訂閱源。 ## 概覽 @@ -21,13 +21,13 @@ JSON RSS 生成器可以幫助你: ## 如何使用 -在管理後台導航至 **工作台 > Curl 轉 RSS**。 +在管理後台導航至 **工作台 > JSON 轉 RSS**。 ### 第一步:請求配置 (Request Configuration) 你需要定義如何獲取 JSON 資料。 -- **從 Curl 匯入 (Import from Curl)**:你可以貼上 `curl` 命令來自動填充 URL、方法、請求標頭和請求體。這在你從瀏覽器開發者工具複製請求時非常有用。 +- **從 cURL 匯入 (Import from cURL)**:你可以貼上 `curl` 命令來自動填充 URL、方法、請求標頭和請求體。這在你從瀏覽器開發者工具複製請求時非常有用。 - **方法 (Method)**:選擇 `GET` 或 `POST`。 - **URL**:API 端點 URL。 - **Headers**:添加任何必要的請求標頭(例如 `Authorization`, `Content-Type`)。 @@ -50,6 +50,28 @@ JSON RSS 生成器可以幫助你: - **日期選取器 (Date Selector)**:(可選)發布日期的路徑。 - **內容選取器 (Content Selector)**:(可選)完整內容或摘要的路徑。 +#### 使用模板 (可選) + +你可以使用 [Go Templates](https://pkg.go.dev/text/template) 語法對提取的值進行進一步處理。 + +**可用變數:** + +- `.Fields`:已解析的欄位值(例如 `.Fields.Title`, `.Fields.Link`, `.Fields.Date`, `.Fields.Description`)。 +- `.Item`:原始 JSON 列表項物件(例如 `.Item.id`, `.Item.author.name`)。 + +**內建函數:** + +- `trimSpace`:移除首尾的空白字元。 +- `trim`:移除首尾指定的字元。 +- `default`:如果欄位為空,提供一個預設值。 + +**範例:** + +- **清理標題空白字元**:`{{ .Fields.Title | trimSpace }}` +- **拼接完整連結**:`https://example.com/article/{{ .Item.id }}` +- **移除特定前缀**:`{{ .Fields.Description | trim "Prefix: " }}` +- **預設值兜底**:`{{ default .Fields.Description "暫無摘要" }}` + 點擊 **執行預覽 (Run Preview)** 驗證你的選取器,然後點擊 **下一步 (Next Step)**。 ### 第三步:訂閱源元數據 (Feed Metadata) diff --git a/doc-site/src/content/docs/zh-tw/guides/advanced/tools.md b/doc-site/src/content/docs/zh-tw/guides/advanced/tools.md index 7a7dcf03..ca4cdead 100644 --- a/doc-site/src/content/docs/zh-tw/guides/advanced/tools.md +++ b/doc-site/src/content/docs/zh-tw/guides/advanced/tools.md @@ -48,6 +48,21 @@ FeedCraft 提供了一些內建工具來幫助您除錯 RSS 來源並監控系 如果遇到 "Craft not found" 等錯誤,可以使用此工具追蹤配置中的斷鏈。 ::: +## 系統運行狀態 (System Runtime) + +**系統運行狀態** (Observability) 工具提供了一個全面的儀表板,用於監控資源的健康狀況和執行狀態。 + +- **使用方法**: + 1. 導航至 **工具 > 系統運行狀態**。 +- **功能**: + - **資源健康 (Resource Health)**: 查看配方及其他組件的當前狀態(健康、降級、暫停),包括連續失敗次數。 + - **執行日誌 (Execution Logs)**: 追蹤詳細的執行歷史、成功率以及每次運行的具體錯誤類型(例如:超時、網路錯誤、解析錯誤)。 + - **系統通知 (System Notifications)**: 查看關於資源狀態轉換的自動警報(例如當配方降級時)。您還可以透過內建的 RSS 來源 `/system/notifications/rss` 訂閱這些警報。 + +:::tip +如果配方反覆失敗並變為「暫停 (Paused)」狀態,您可以在解決根本問題後,透過系統運行狀態儀表板手動將其「恢復 (Resume)」。 +::: + ## 除錯工具 (Debug Tools) ### LLM 除錯 (LLM Debug) diff --git a/doc-site/src/content/docs/zh-tw/guides/start/concepts.md b/doc-site/src/content/docs/zh-tw/guides/start/concepts.md index 4a3daf3f..74688ba4 100644 --- a/doc-site/src/content/docs/zh-tw/guides/start/concepts.md +++ b/doc-site/src/content/docs/zh-tw/guides/start/concepts.md @@ -57,6 +57,11 @@ sidebar: - **限制 (Limit)**:僅保留最新發布的指定數量項目。 **管理主題訂閱:** + +:::caution +Topic Feed (主題訂閱) 功能當前仍在開發完善中。其在管理後台的入口已被臨時隱藏,待功能穩定就緒後將重新開放。 +::: + 在管理後台導航至 **工作台 (Worktable) > 主題訂閱 (Topic Feed)** 頁面建立和管理主題。 - **建立**:定義標題,添加多個輸入 URI(例如 `feedcraft://recipe/my-recipe` 或外部 RSS 連結),並配置你的聚合規則。 diff --git a/doc-site/src/content/docs/zh/guides/advanced/customization.md b/doc-site/src/content/docs/zh/guides/advanced/customization.md index f86aad0c..da7b950d 100644 --- a/doc-site/src/content/docs/zh/guides/advanced/customization.md +++ b/doc-site/src/content/docs/zh/guides/advanced/customization.md @@ -65,12 +65,15 @@ sidebar: - **FC_PUPPETEER_HTTP_ENDPOINT**: Browserless/Chrome 实例的地址。`fulltext-plus` 功能必须。 - **FC_REDIS_URI**: Redis 连接地址。用于缓存,加快处理速度并减少 AI Token 消耗。 +- **FC_HTTP_USER_AGENT_FEED**: (可选)feed 类外部请求的默认 `User-Agent`,例如抓取 RSS/XML 资源时使用。搜索提供方请求目前也临时归入这一规则。 +- **FC_HTTP_USER_AGENT_HTML**: (可选)HTML 页面抓取的默认 `User-Agent`,例如全文提取和 HTML 转 RSS 工具使用。**注意:** 如果该值包含空格或括号,必须使用引号括起来。 - **FC_LLM_API_KEY**: OpenAI 或兼容服务(如 DeepSeek, Gemini 等)的 API Key。 - **FC_LLM_API_MODEL**: 默认使用的模型(如 `gemini-pro`, `gpt-3.5-turbo`)。**支持多个模型:** 你可以提供一个逗号分隔的模型列表(例如 `gpt-3.5-turbo,gpt-4`)。FeedCraft 会为每个请求随机选择一个模型,如果调用失败,会自动重试列表中的其他模型。 - **FC_LLM_API_BASE**: API 接口地址。如果是兼容 OpenAI 的 API,通常以 `/v1` 结尾。 - **FC_LLM_API_TYPE**: (可选) `openai` (默认) 或 `ollama`. - **FC_LLM_MAX_CONCURRENCY**: (可选) 全局最大 LLM 并发请求数(默认: `3`)。用于限制并发请求数量以防止触发 API 速率限制。 - **FC_DOMAIN_MAX_CONCURRENCY**: (可选) 网页抓取(如全文提取)时每个目标域名的最大并发数(默认: `3`)。防止抓取目标服务器负载过高。 +- **LOG_LEVEL**: (可选) 后端应用的日志级别 (例如 `info`, `debug`, `trace`)。覆盖 `ENV` 设置的默认级别。 ### 外部服务 diff --git a/doc-site/src/content/docs/zh/guides/advanced/html-to-rss.md b/doc-site/src/content/docs/zh/guides/advanced/html-to-rss.md index 1b70ad82..33a5fe7a 100644 --- a/doc-site/src/content/docs/zh/guides/advanced/html-to-rss.md +++ b/doc-site/src/content/docs/zh/guides/advanced/html-to-rss.md @@ -11,7 +11,7 @@ sidebar: FeedCraft 内置了可视化的 **从 HTML 网页生成 RSS (HTML to RSS)** 工具,允许你生成选择器,以便为那些没有提供 RSS 的网站创建订阅源。 :::note -此工具专为 HTML 页面设计。如果你需要处理 JSON API,请使用 [从 CURL 语句生成 RSS](/zh/guides/advanced/curl-to-rss/)。 +此工具专为 HTML 页面设计。如果你需要处理 JSON API,请使用 [从 JSON 生成 RSS](/zh/guides/advanced/json-to-rss/)。 ::: ## 概览 diff --git a/doc-site/src/content/docs/zh/guides/advanced/curl-to-rss.md b/doc-site/src/content/docs/zh/guides/advanced/json-to-rss.md similarity index 74% rename from doc-site/src/content/docs/zh/guides/advanced/curl-to-rss.md rename to doc-site/src/content/docs/zh/guides/advanced/json-to-rss.md index 3e050389..5cfb8428 100644 --- a/doc-site/src/content/docs/zh/guides/advanced/curl-to-rss.md +++ b/doc-site/src/content/docs/zh/guides/advanced/json-to-rss.md @@ -1,5 +1,5 @@ --- -title: 从CURL语句生成RSS +title: 从 JSON 生成 RSS description: 使用 jq 提取字段,并通过可选模板将任意 JSON API 响应转换为 RSS 订阅源。 sidebar: order: 3 @@ -8,7 +8,7 @@ sidebar: variant: success --- -FeedCraft 包含一个 **从 CURL 语句生成 RSS (CURL to RSS)** 工具,允许你从 JSON API 获取数据,先用 `jq` 提取字段,再通过可选模板将其转换为 RSS 订阅源。 +FeedCraft 包含一个 **从 JSON 生成 RSS (JSON to RSS)** 工具,允许你从 JSON API 获取数据,先用 `jq` 提取字段,再通过可选模板将其转换为 RSS 订阅源。 ## 概览 @@ -21,13 +21,13 @@ JSON RSS 生成器可以帮助你: ## 如何使用 -在管理后台导航至 **工作台 > Curl 转 RSS**。 +在管理后台导航至 **工作台 > JSON 转 RSS**。 ### 第一步:请求配置 (Request Configuration) 你需要定义如何获取 JSON 数据。 -- **从 Curl 导入 (Import from Curl)**:你可以粘贴 `curl` 命令来自动填充 URL、方法、请求头和请求体。这在你从浏览器开发者工具复制请求时非常有用。 +- **从 cURL 导入 (Import from cURL)**:你可以粘贴 `curl` 命令来自动填充 URL、方法、请求头和请求体。这在你从浏览器开发者工具复制请求时非常有用。 - **方法 (Method)**:选择 `GET` 或 `POST`。 - **URL**:API 端点 URL。 - **Headers**:添加任何必要的请求头(例如 `Authorization`, `Content-Type`)。 @@ -50,6 +50,28 @@ JSON RSS 生成器可以帮助你: - **日期选择器 (Date Selector)**:(可选)发布日期的路径。 - **内容选择器 (Content Selector)**:(可选)完整内容或摘要的路径。 +#### 使用模板 (可选) + +你可以使用 [Go Templates](https://pkg.go.dev/text/template) 语法对提取的值进行进一步处理。 + +**可用变量:** + +- `.Fields`:已解析的字段值(例如 `.Fields.Title`, `.Fields.Link`, `.Fields.Date`, `.Fields.Description`)。 +- `.Item`:原始 JSON 列表项对象(例如 `.Item.id`, `.Item.author.name`)。 + +**内置函数:** + +- `trimSpace`:移除首尾的空白字符。 +- `trim`:移除首尾指定的字符。 +- `default`:如果字段为空,提供一个默认值。 + +**示例:** + +- **清理标题空白字符**:`{{ .Fields.Title | trimSpace }}` +- **拼接完整链接**:`https://example.com/article/{{ .Item.id }}` +- **移除特定前缀**:`{{ .Fields.Description | trim "Prefix: " }}` +- **默认值兜底**:`{{ default .Fields.Description "暂无摘要" }}` + 点击 **运行预览 (Run Preview)** 验证你的选择器,然后点击 **下一步 (Next Step)**。 ### 第三步:订阅源元数据 (Feed Metadata) diff --git a/doc-site/src/content/docs/zh/guides/advanced/tools.md b/doc-site/src/content/docs/zh/guides/advanced/tools.md index b09e5291..fb07d63c 100644 --- a/doc-site/src/content/docs/zh/guides/advanced/tools.md +++ b/doc-site/src/content/docs/zh/guides/advanced/tools.md @@ -48,6 +48,21 @@ FeedCraft 提供了一些内置工具来帮助您调试 RSS 源并监控系统 如果遇到 "Craft not found" 等错误,可以使用此工具追踪配置中的断链。 ::: +## 系统运行状态 (System Runtime) + +**系统运行状态** (Observability) 工具提供了一个全面的仪表板,用于监控资源的健康状况和执行状态。 + +- **使用方法**: + 1. 导航至 **工具 > 系统运行状态**。 +- **功能**: + - **资源健康 (Resource Health)**: 查看配方及其他组件的当前状态(健康、降级、暂停),包括连续失败次数。 + - **执行日志 (Execution Logs)**: 跟踪详细的执行历史、成功率以及每次运行的具体错误类型(例如:超时、网络错误、解析错误)。 + - **系统通知 (System Notifications)**: 查看关于资源状态转换的自动警报(例如当配方降级时)。您还可以通过内置的 RSS 源 `/system/notifications/rss` 订阅这些警报。 + +:::tip +如果配方反复失败并变为“暂停 (Paused)”状态,您可以在解决根本问题后,通过系统运行状态仪表板手动将其“恢复 (Resume)”。 +::: + ## 调试工具 (Debug Tools) ### LLM 调试 (LLM Debug) diff --git a/doc-site/src/content/docs/zh/guides/start/concepts.md b/doc-site/src/content/docs/zh/guides/start/concepts.md index d35ed1e1..d4847ef1 100644 --- a/doc-site/src/content/docs/zh/guides/start/concepts.md +++ b/doc-site/src/content/docs/zh/guides/start/concepts.md @@ -57,6 +57,11 @@ sidebar: - **限制 (Limit)**:仅保留最新发布的指定数量项目。 **管理主题订阅:** + +:::caution +Topic Feed (主题订阅) 功能当前仍在开发完善中。其在管理后台的入口已被临时隐藏,待功能稳定就绪后将重新开放。 +::: + 在管理后台导航至 **工作台 (Worktable) > 主题订阅 (Topic Feed)** 页面创建和管理主题。 - **创建**:定义标题,添加多个输入 URI(例如 `feedcraft://recipe/my-recipe` 或外部 RSS 链接),并配置你的聚合规则。 diff --git a/internal/adapter/llm.go b/internal/adapter/llm.go index c43c4bb2..f0c4a77a 100644 --- a/internal/adapter/llm.go +++ b/internal/adapter/llm.go @@ -13,7 +13,7 @@ func CallLLMUsingContext(prompt, context string, option util.ContentProcessOptio processedContext = strings.ReplaceAll(processedContext, "`", "") finalPrompt := fmt.Sprintf("%s \n```\n%s\n```", prompt, processedContext) - cacheKey := fmt.Sprintf("llm_call_%s", util.GetMD5Hash(finalPrompt)) + cacheKey := fmt.Sprintf("llm_call_%s", util.GetTextContentHash(finalPrompt)) valFunc := func() (string, error) { return SimpleLLMCall(UseDefaultModel, finalPrompt) } diff --git a/internal/config/source_config.go b/internal/config/source_config.go index 8c249fe1..6bc33056 100644 --- a/internal/config/source_config.go +++ b/internal/config/source_config.go @@ -4,6 +4,11 @@ import ( "FeedCraft/internal/constant" ) +const ( + HttpFetcherPurposeFeed = "feed" + HttpFetcherPurposeHTML = "html" +) + // --- Fetcher-specific Configurations --- // HttpFetcherConfig holds the configuration for an HTTP fetcher. @@ -14,6 +19,7 @@ type HttpFetcherConfig struct { Headers map[string]string `json:"headers,omitempty"` Body string `json:"body,omitempty"` UseBrowserless bool `json:"use_browserless,omitempty"` + Purpose string `json:"purpose,omitempty"` } // SearchFetcherConfig holds the configuration for search-based fetching. diff --git a/internal/controller/craft_flow.go b/internal/controller/craft_flow.go index 91ba867a..5ddc4eef 100644 --- a/internal/controller/craft_flow.go +++ b/internal/controller/craft_flow.go @@ -112,8 +112,13 @@ func DeleteCraftFlow(c *gin.Context) { return } - if err := db.Delete(craftFlow).Error; err != nil { - c.JSON(http.StatusBadRequest, util.APIResponse[any]{Msg: err.Error()}) + result := db.Delete(craftFlow) + if result.Error != nil { + c.JSON(http.StatusBadRequest, util.APIResponse[any]{Msg: result.Error.Error()}) + return + } + if result.RowsAffected == 0 { + c.JSON(http.StatusNotFound, util.APIResponse[any]{Msg: "CraftFlow not found"}) return } diff --git a/internal/controller/curl_to_rss.go b/internal/controller/curl_to_rss.go index 28c214ed..ed3d84a6 100644 --- a/internal/controller/curl_to_rss.go +++ b/internal/controller/curl_to_rss.go @@ -175,7 +175,9 @@ func CurlParse(c *gin.Context) { } var input interface{} - if err := json.Unmarshal([]byte(req.JsonContent), &input); err != nil { + decoder := json.NewDecoder(strings.NewReader(req.JsonContent)) + decoder.UseNumber() + if err := decoder.Decode(&input); err != nil { c.JSON(http.StatusBadRequest, util.APIResponse[any]{StatusCode: -1, Msg: "Invalid JSON content: " + err.Error()}) return } diff --git a/internal/controller/custom_recipe.go b/internal/controller/custom_recipe.go index ffba1e9d..6b6872f4 100644 --- a/internal/controller/custom_recipe.go +++ b/internal/controller/custom_recipe.go @@ -111,6 +111,10 @@ func DeleteCustomRecipe(c *gin.Context) { db := util.GetDatabase() if err := dao.DeleteCustomRecipeV2(db, id); err != nil { + if errors.Is(err, gorm.ErrRecordNotFound) { + c.JSON(http.StatusNotFound, util.APIResponse[any]{Msg: "Recipe not found"}) + return + } c.JSON(http.StatusInternalServerError, util.APIResponse[any]{Msg: err.Error()}) return } diff --git a/internal/controller/feed_viewer.go b/internal/controller/feed_viewer.go new file mode 100644 index 00000000..9b131a6e --- /dev/null +++ b/internal/controller/feed_viewer.go @@ -0,0 +1,254 @@ +package controller + +import ( + "FeedCraft/internal/config" + "FeedCraft/internal/constant" + "FeedCraft/internal/craft" + "FeedCraft/internal/model" + "FeedCraft/internal/source" + "FeedCraft/internal/util" + "errors" + "fmt" + "net" + "net/http" + "net/url" + "strings" + "time" + + "github.com/gin-gonic/gin" + "github.com/mmcdole/gofeed" +) + +type FeedViewerPreviewReq struct { + InputURL string `json:"input_url" form:"input_url" binding:"required"` + CraftName string `json:"craft_name" form:"craft_name"` +} + +type FeedViewerPreview struct { + Title string `json:"title"` + Description string `json:"description"` + Link string `json:"link"` + FeedURL string `json:"feedUrl"` + Copyright string `json:"copyright"` + Image *FeedViewerPreviewImage `json:"image,omitempty"` + Items []FeedViewerPreviewItem `json:"items"` +} + +type FeedViewerPreviewImage struct { + URL string `json:"url"` + Title string `json:"title"` +} + +type FeedViewerPreviewItem struct { + GUID string `json:"guid"` + Title string `json:"title"` + Link string `json:"link"` + PubDate string `json:"pubDate"` + IsoDate string `json:"isoDate"` + Content string `json:"content"` + ContentSnippet string `json:"contentSnippet"` +} + +func PreviewFeedViewer(c *gin.Context) { + var req FeedViewerPreviewReq + if err := c.ShouldBindQuery(&req); err != nil { + c.JSON(http.StatusBadRequest, util.APIResponse[any]{StatusCode: -1, Msg: "Please enter a valid http(s) feed URL"}) + return + } + + if err := validateFeedViewerURL(req.InputURL); err != nil { + c.JSON(http.StatusBadRequest, util.APIResponse[any]{StatusCode: -1, Msg: err.Error()}) + return + } + + feed, err := loadFeedViewerPreview(c, req) + if err != nil { + status, msg := classifyFeedViewerError(err) + c.JSON(status, util.APIResponse[any]{StatusCode: -1, Msg: msg}) + return + } + + c.JSON(http.StatusOK, util.APIResponse[FeedViewerPreview]{ + StatusCode: 0, + Data: buildFeedViewerPreview(feed, req.InputURL), + }) +} + +func loadFeedViewerPreview(c *gin.Context, req FeedViewerPreviewReq) (*model.CraftFeed, error) { + cfg := &config.SourceConfig{ + Type: constant.SourceRSS, + HttpFetcher: &config.HttpFetcherConfig{ + URL: req.InputURL, + }, + } + + factory, err := source.Get(constant.SourceRSS) + if err != nil { + return nil, fmt.Errorf("factory not found: %w", err) + } + + src, err := factory(cfg) + if err != nil { + return nil, fmt.Errorf("failed to create source: %w", err) + } + + feed, err := src.Fetch(c.Request.Context()) + if err != nil { + return nil, err + } + + if req.CraftName == "" || req.CraftName == "proxy" { + return feed, nil + } + + craftedFeed, err := buildCraftPreview(feed, req.InputURL, req.CraftName) + if err != nil { + return nil, err + } + + return craftedFeed, nil +} + +func buildCraftPreview(feed *model.CraftFeed, inputURL, craftName string) (*model.CraftFeed, error) { + atomXML, err := feed.ToFeedsFeed().ToAtom() + if err != nil { + return nil, err + } + + parsedFeed, err := gofeed.NewParser().ParseString(atomXML) + if err != nil { + return nil, err + } + + craftedFeed, err := craft.ProcessFeed(parsedFeed, inputURL, craftName) + if err != nil { + return nil, err + } + + return model.FromFeedsFeed(craftedFeed), nil +} + +func buildFeedViewerPreview(feed *model.CraftFeed, inputURL string) FeedViewerPreview { + preview := FeedViewerPreview{ + Title: feed.Title, + Description: feed.Description, + Link: feed.Link, + FeedURL: inputURL, + Copyright: feed.Copyright, + Items: make([]FeedViewerPreviewItem, 0, len(feed.Articles)), + } + + if feed.ImageURL != "" || feed.ImageTitle != "" { + preview.Image = &FeedViewerPreviewImage{ + URL: feed.ImageURL, + Title: feed.ImageTitle, + } + } + + for _, article := range feed.Articles { + if article == nil { + continue + } + + contentSnippet := article.Description + if contentSnippet == "" { + contentSnippet = article.Content + } + + preview.Items = append(preview.Items, FeedViewerPreviewItem{ + GUID: article.Id, + Title: article.Title, + Link: article.Link, + PubDate: formatFeedViewerTime(article.Created, article.Updated), + IsoDate: formatFeedViewerISOTime(article.Created, article.Updated), + Content: article.Content, + ContentSnippet: contentSnippet, + }) + } + + return preview +} + +func formatFeedViewerTime(primary, fallback time.Time) string { + if !primary.IsZero() { + return primary.Format(time.RFC1123Z) + } + if !fallback.IsZero() { + return fallback.Format(time.RFC1123Z) + } + return "" +} + +func formatFeedViewerISOTime(primary, fallback time.Time) string { + if !primary.IsZero() { + return primary.UTC().Format(time.RFC3339) + } + if !fallback.IsZero() { + return fallback.UTC().Format(time.RFC3339) + } + return "" +} + +func validateFeedViewerURL(rawURL string) error { + parsedURL, err := url.Parse(rawURL) + if err != nil || parsedURL == nil { + return errors.New("Please enter a valid http(s) feed URL") + } + if parsedURL.Scheme != "http" && parsedURL.Scheme != "https" { + return errors.New("Please enter a valid http(s) feed URL") + } + if parsedURL.Hostname() == "" { + return errors.New("Please enter a valid http(s) feed URL") + } + + ips, err := net.LookupIP(parsedURL.Hostname()) + if err != nil { + return fmt.Errorf("Unable to resolve this URL: %w", err) + } + for _, ip := range ips { + if ip.IsLoopback() || ip.IsPrivate() { + return fmt.Errorf("Access to private IP %s is forbidden", ip.String()) + } + } + + return nil +} + +func classifyFeedViewerError(err error) (int, string) { + msg := err.Error() + msg = strings.TrimPrefix(msg, "all items failed to process. last error: ") + + switch { + case strings.Contains(msg, "browserless service returned status"): + return http.StatusOK, humanizeBrowserlessStatus(msg) + case strings.Contains(msg, "http status not ok:"): + return http.StatusOK, humanizeFeedViewerHTTPStatus(msg) + case strings.Contains(msg, "http get failed:"), strings.Contains(msg, "browserless fetch failed:"), strings.Contains(msg, "failed to read response body:"), strings.Contains(msg, "Unable to resolve this URL"): + return http.StatusOK, "Unable to fetch this URL. Please check the address and try again." + case strings.Contains(msg, "parse failed:"), strings.Contains(msg, "invalid XML"): + return http.StatusOK, "The URL is reachable, but it does not appear to be a valid RSS or Atom feed." + case strings.Contains(msg, "not a valid craft name"): + return http.StatusBadRequest, "Please select a valid craft before comparing feeds." + default: + return http.StatusInternalServerError, "Failed to preview this feed due to an internal error." + } +} + +func humanizeBrowserlessStatus(msg string) string { + status := strings.TrimSpace(strings.TrimPrefix(msg, "browserless service returned status")) + if status == "" { + return "Browserless service failed to render the URL." + } + return fmt.Sprintf("Browserless service failed to render the URL (returned status %s). Please check the address or the browserless service.", status) +} + +func humanizeFeedViewerHTTPStatus(msg string) string { + status := strings.TrimSpace(strings.TrimPrefix(msg, "fetch failed: http status not ok:")) + if status == "" { + status = strings.TrimSpace(strings.TrimPrefix(msg, "http status not ok:")) + } + if status == "" { + return "Unable to fetch this URL. Please check the address and try again." + } + return fmt.Sprintf("The source returned %s, so the feed could not be loaded.", status) +} diff --git a/internal/controller/html_to_rss.go b/internal/controller/html_to_rss.go index f5d2cf29..b3f84333 100644 --- a/internal/controller/html_to_rss.go +++ b/internal/controller/html_to_rss.go @@ -2,6 +2,7 @@ package controller import ( "FeedCraft/internal/craft" + fetcherpkg "FeedCraft/internal/source/fetcher" "FeedCraft/internal/util" "fmt" "net" @@ -67,19 +68,13 @@ func fetchHTML(targetURL string, useBrowserless bool) (string, error) { }) } - // Try standard HTTP request (simulating a browser user agent) client := resty.New() client.SetTimeout(craft.DefaultExtractFulltextTimeout) - resp, err := client.R(). - SetHeader("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"). - SetHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7"). - SetHeader("Accept-Language", "en-US,en;q=0.9"). - SetHeader("Upgrade-Insecure-Requests", "1"). - SetHeader("Sec-Fetch-Dest", "document"). - SetHeader("Sec-Fetch-Mode", "navigate"). - SetHeader("Sec-Fetch-Site", "none"). - SetHeader("Sec-Fetch-User", "?1"). - Get(targetURL) + req := client.R() + for key, value := range fetcherpkg.HTMLDefaultHeaders() { + req.SetHeader(key, value) + } + resp, err := req.Get(targetURL) if err != nil { return "", fmt.Errorf("fetch failed: %w", err) diff --git a/internal/craft/benchmark_test.go b/internal/craft/benchmark_test.go index 13a298a8..d987ae2a 100644 --- a/internal/craft/benchmark_test.go +++ b/internal/craft/benchmark_test.go @@ -2,24 +2,14 @@ package craft import ( "FeedCraft/internal/util" - "fmt" "testing" "time" - "github.com/alicebob/miniredis/v2" "github.com/mmcdole/gofeed" ) func BenchmarkProcessFeed_TranslateTitle_Cached(b *testing.B) { - // Setup Miniredis - s, err := miniredis.Run() - if err != nil { - b.Fatalf("Could not start miniredis: %s", err) - } - defer s.Close() - - // Set Environment Variables - b.Setenv("FC_REDIS_URI", fmt.Sprintf("redis://%s", s.Addr())) + redis := setupTestRedis(b) b.Setenv("FC_DEFAULT_TARGET_LANG", "zh-CN") // Setup DB Path @@ -32,23 +22,13 @@ func BenchmarkProcessFeed_TranslateTitle_Cached(b *testing.B) { // Pre-populate Cache itemTitle := "Hello World" - // Calculate MD5 - md5Hash := util.GetMD5Hash(itemTitle) - - // Key format: web_content_translate title_ - // We use the internal helper to avoid hardcoding the format - cacheKey := getCraftCacheKey("translate title", md5Hash) - - // Set in Miniredis - _ = s.Set(cacheKey, "你好世界") - // Set TTL just in case (CachedFunc sets it) - s.SetTTL(cacheKey, time.Hour) + hash := util.GetTextContentHash(itemTitle) + cacheKey := getCraftCacheKey("translate title", hash) + redis.SetString(b, cacheKey, "你好世界", time.Hour) b.ResetTimer() for i := 0; i < b.N; i++ { - b.StopTimer() // Setup per iteration - - // Reset feed item because craft might modify it in place + b.StopTimer() feed := &gofeed.Feed{ Items: []*gofeed.Item{ { @@ -59,7 +39,6 @@ func BenchmarkProcessFeed_TranslateTitle_Cached(b *testing.B) { }, }, } - b.StartTimer() _, err := ProcessFeed(feed, feedURL, craftName) diff --git a/internal/craft/content_processors.go b/internal/craft/content_processors.go index 293dbed5..235ca006 100644 --- a/internal/craft/content_processors.go +++ b/internal/craft/content_processors.go @@ -193,12 +193,12 @@ func cacheKeyForCraftArticleContent(article *model.CraftArticle) (string, error) if strings.TrimSpace(content) == "" { content = article.Description } - return util.GetMD5Hash(content), nil + return util.GetTextContentHash(content), nil } func cacheKeyForCraftArticleLink(article *model.CraftArticle) (string, error) { uniqLinkStr := article.Title + article.Id + article.Link - return util.GetMD5Hash(uniqLinkStr), nil + return util.GetTextContentHash(uniqLinkStr), nil } func applyRelativeLinkFix(ctx context.Context, feed *model.CraftFeed, originalFeedURL string) (*model.CraftFeed, error) { diff --git a/internal/craft/guid.go b/internal/craft/guid.go index aa2e4982..750b70f5 100644 --- a/internal/craft/guid.go +++ b/internal/craft/guid.go @@ -2,7 +2,6 @@ package craft import ( "FeedCraft/internal/util" - "fmt" "github.com/google/uuid" "github.com/gorilla/feeds" @@ -28,8 +27,7 @@ func feedItemGuidGenerator(item *feeds.Item) (string, error) { } combinedInput := item.Title + item.Content + item.Description - hash := util.GetMD5Hash(combinedInput) - return fmt.Sprintf("%x", hash), nil + return util.GetTextContentHash(combinedInput), nil } func GetGuidCraftOptions() []CraftOption { diff --git a/internal/craft/llm_processors.go b/internal/craft/llm_processors.go index 5ce166e1..c4fe2a81 100644 --- a/internal/craft/llm_processors.go +++ b/internal/craft/llm_processors.go @@ -325,9 +325,9 @@ func GetCommonCachedArticlePredicate(cacheKeyGenerator ArticleCacheKeyGenerator, } func newArticleTitleContentCacheKeyGenerator(prompt string) ArticleCacheKeyGenerator { - promptHash := util.GetMD5Hash(prompt) + promptHash := util.GetTextContentHash(prompt) return func(article *model.CraftArticle) (string, error) { - payloadHash := util.GetMD5Hash(strings.Join([]string{ + payloadHash := util.GetTextContentHash(strings.Join([]string{ promptHash, strings.TrimSpace(article.Title), strings.TrimSpace(getPrimaryArticleContent(article)), diff --git a/internal/craft/option.go b/internal/craft/option.go index 39825791..829c0284 100644 --- a/internal/craft/option.go +++ b/internal/craft/option.go @@ -1,14 +1,18 @@ package craft import ( + "FeedCraft/internal/config" + "FeedCraft/internal/source/fetcher" + "bytes" + "context" "fmt" "github.com/gorilla/feeds" "github.com/mmcdole/gofeed" "github.com/samber/lo" "github.com/samber/lo/parallel" "github.com/sirupsen/logrus" - "strings" + "time" ) type CraftedFeed struct { @@ -26,8 +30,19 @@ type CraftOption func(*feeds.Feed, ExtraPayload) error func NewCraftedFeedFromUrl(feedUrl string, options ...CraftOption) (CraftedFeed, error) { ingredient := CraftedFeed{originalFeedUrl: feedUrl} + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + raw, err := (&fetcher.HttpFetcher{Config: &config.HttpFetcherConfig{ + URL: feedUrl, + Purpose: config.HttpFetcherPurposeFeed, + }}).Fetch(ctx) + if err != nil { + return ingredient, err + } + fp := gofeed.NewParser() - parsedFeed, err := fp.ParseURL(feedUrl) + parsedFeed, err := fp.Parse(bytes.NewReader(raw)) if err != nil { return ingredient, err } diff --git a/internal/craft/option_fetch_test.go b/internal/craft/option_fetch_test.go new file mode 100644 index 00000000..20bc70c0 --- /dev/null +++ b/internal/craft/option_fetch_test.go @@ -0,0 +1,40 @@ +package craft + +import ( + "net/http" + "net/http/httptest" + "testing" +) + +func TestNewCraftedFeedFromUrlUsesFeedFetcherUserAgent(t *testing.T) { + var gotUA string + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + gotUA = r.Header.Get("User-Agent") + w.Header().Set("Content-Type", "application/rss+xml") + _, _ = w.Write([]byte(` + + + Feed + https://example.com/ + Test feed + + Item 1 + https://example.com/item-1 + Hello + + +`)) + })) + defer server.Close() + + crafted, err := NewCraftedFeedFromUrl(server.URL) + if err != nil { + t.Fatalf("NewCraftedFeedFromUrl returned error: %v", err) + } + if crafted.OutputFeed == nil { + t.Fatal("expected output feed") + } + if gotUA != "FeedCraft/2.0" { + t.Fatalf("expected feed fetcher user agent, got %q", gotUA) + } +} diff --git a/internal/craft/runtime.go b/internal/craft/runtime.go index 905f4178..68453518 100644 --- a/internal/craft/runtime.go +++ b/internal/craft/runtime.go @@ -178,7 +178,7 @@ func (p *GUIDFixProcessor) Process(ctx context.Context, feed *model.CraftFeed) ( article.Id = article.Link continue } - article.Id = fmt.Sprintf("%x", util.GetMD5Hash(article.Title+article.Content+article.Description)) + article.Id = util.GetTextContentHash(article.Title + article.Content + article.Description) } return cloned, nil } diff --git a/internal/craft/runtime_test.go b/internal/craft/runtime_test.go index a69f533b..2bb55ce9 100644 --- a/internal/craft/runtime_test.go +++ b/internal/craft/runtime_test.go @@ -158,6 +158,8 @@ func TestNativeProcessors_EndToEnd(t *testing.T) { } func TestCleanupProcessor_UsesDescriptionFallback(t *testing.T) { + setupTestRedis(t) + original := cleanupTransformFunc cleanupTransformFunc = func(content string, domain string) (string, error) { return fmt.Sprintf("%s|%s", domain, content), nil @@ -168,8 +170,8 @@ func TestCleanupProcessor_UsesDescriptionFallback(t *testing.T) { feed := &model.CraftFeed{ Articles: []*model.CraftArticle{ { - Title: "article", - Link: "https://example.com/post", + Title: "article-" + t.Name(), + Link: "https://example.com/post/" + t.Name(), Description: "

fallback

", }, }, @@ -184,6 +186,8 @@ func TestCleanupProcessor_UsesDescriptionFallback(t *testing.T) { } func TestFulltextProcessor_PartialFailureAndRelativeLinkFix(t *testing.T) { + setupTestRedis(t) + original := fulltextExtractFunc fulltextExtractFunc = func(url string, timeout time.Duration) (string, error) { if url == "https://example.com/fail" { @@ -197,8 +201,8 @@ func TestFulltextProcessor_PartialFailureAndRelativeLinkFix(t *testing.T) { feed := &model.CraftFeed{ Link: "https://example.com", Articles: []*model.CraftArticle{ - {Title: "ok", Link: "/ok"}, - {Title: "bad", Link: "/fail"}, + {Title: "ok-" + t.Name(), Link: "/ok"}, + {Title: "bad-" + t.Name(), Link: "/fail"}, }, } @@ -213,6 +217,8 @@ func TestFulltextProcessor_PartialFailureAndRelativeLinkFix(t *testing.T) { } func TestFulltextProcessor_AllFailureReturnsError(t *testing.T) { + setupTestRedis(t) + original := fulltextExtractFunc fulltextExtractFunc = func(url string, timeout time.Duration) (string, error) { return "", fmt.Errorf("always fail") @@ -223,8 +229,8 @@ func TestFulltextProcessor_AllFailureReturnsError(t *testing.T) { feed := &model.CraftFeed{ Link: "https://example.com", Articles: []*model.CraftArticle{ - {Title: "a", Link: "/a"}, - {Title: "b", Link: "/b"}, + {Title: "a-" + t.Name(), Link: "/a"}, + {Title: "b-" + t.Name(), Link: "/b"}, }, } @@ -234,6 +240,8 @@ func TestFulltextProcessor_AllFailureReturnsError(t *testing.T) { } func TestFulltextPlusProcessor_UsesConfiguredOptions(t *testing.T) { + setupTestRedis(t) + original := fulltextPlusExtractFunc var capturedURL string var capturedOptions util.BrowserlessOptions @@ -274,6 +282,8 @@ func TestBuildLLMArticlePayload_IncludesTitleAndContent(t *testing.T) { } func TestSummaryProcessor_UsesDescriptionFallback(t *testing.T) { + setupTestRedis(t) + original := llmContextCaller llmContextCaller = func(prompt, context string, option util.ContentProcessOption) (string, error) { assert.Contains(t, context, "Article Title:") @@ -286,8 +296,8 @@ func TestSummaryProcessor_UsesDescriptionFallback(t *testing.T) { feed := &model.CraftFeed{ Articles: []*model.CraftArticle{ { - Title: "summary article", - Link: "https://example.com/post", + Title: "summary article " + t.Name(), + Link: "https://example.com/post/" + t.Name(), Description: "

fallback body

", }, }, @@ -301,6 +311,8 @@ func TestSummaryProcessor_UsesDescriptionFallback(t *testing.T) { } func TestTranslateTitleProcessor_UsesNativeLLMFlow(t *testing.T) { + setupTestRedis(t) + original := llmContextCaller llmContextCaller = func(prompt, context string, option util.ContentProcessOption) (string, error) { assert.Contains(t, context, "Original Title") @@ -311,17 +323,19 @@ func TestTranslateTitleProcessor_UsesNativeLLMFlow(t *testing.T) { processor := newTranslateTitleProcessor("translate prompt " + t.Name()) feed := &model.CraftFeed{ Articles: []*model.CraftArticle{ - {Title: "Original Title"}, + {Title: "Original Title " + t.Name()}, }, } result, err := processor.Process(context.Background(), feed) require.NoError(t, err) assert.Equal(t, "Translated Title", result.Articles[0].Title) - assert.Equal(t, "Original Title", feed.Articles[0].Title) + assert.Equal(t, "Original Title "+t.Name(), feed.Articles[0].Title) } func TestBeautifyContentProcessor_WritesHTML(t *testing.T) { + setupTestRedis(t) + original := llmCaller llmCaller = func(model string, promptInput string) (string, error) { assert.Contains(t, promptInput, "

Body

") @@ -332,7 +346,7 @@ func TestBeautifyContentProcessor_WritesHTML(t *testing.T) { processor := newBeautifyContentProcessor("beautify prompt " + t.Name()) feed := &model.CraftFeed{ Articles: []*model.CraftArticle{ - {Title: "beautify", Content: "

Body

"}, + {Title: "beautify-" + t.Name(), Content: "

Body

"}, }, } @@ -343,6 +357,8 @@ func TestBeautifyContentProcessor_WritesHTML(t *testing.T) { } func TestLLMFilterProcessor_RemovesMatchedArticleAndUsesTitleContentPayload(t *testing.T) { + setupTestRedis(t) + original := llmContextCaller var seen []string llmContextCaller = func(prompt, context string, option util.ContentProcessOption) (string, error) { diff --git a/internal/craft/test_redis_helper_test.go b/internal/craft/test_redis_helper_test.go new file mode 100644 index 00000000..52ffd91a --- /dev/null +++ b/internal/craft/test_redis_helper_test.go @@ -0,0 +1,42 @@ +package craft + +import ( + "fmt" + "testing" + "time" + + "github.com/alicebob/miniredis/v2" +) + +type testRedisServer struct { + server *miniredis.Miniredis +} + +func setupTestRedis(tb testing.TB) *testRedisServer { + tb.Helper() + + server, err := miniredis.Run() + if err != nil { + tb.Fatalf("could not start miniredis: %v", err) + } + + tb.Setenv("FC_REDIS_URI", fmt.Sprintf("redis://%s", server.Addr())) + tb.Cleanup(server.Close) + + return &testRedisServer{server: server} +} + +func (r *testRedisServer) FlushAll(tb testing.TB) { + tb.Helper() + r.server.FlushAll() +} + +func (r *testRedisServer) SetString(tb testing.TB, key string, value string, ttl time.Duration) { + tb.Helper() + if err := r.server.Set(key, value); err != nil { + tb.Fatalf("could not seed redis key %q: %v", key, err) + } + if ttl > 0 { + r.server.SetTTL(key, ttl) + } +} diff --git a/internal/craft/translate.go b/internal/craft/translate.go index ae7c6bb4..6a6e2988 100644 --- a/internal/craft/translate.go +++ b/internal/craft/translate.go @@ -35,10 +35,10 @@ func translateArticleContent(content string, prompt string) (string, error) { type ContentCacheKeyGenerator TransFunc func cacheKeyForArticleTitle(item *feeds.Item) (string, error) { - return util.GetMD5Hash(item.Title), nil + return util.GetTextContentHash(item.Title), nil } func cacheKeyForArticleContent(item *feeds.Item) (string, error) { - return util.GetMD5Hash(item.Description), nil + return util.GetTextContentHash(item.Description), nil } func cacheKeyForArticleLink(item *feeds.Item) (string, error) { uniqLinkStr := item.Title @@ -48,7 +48,7 @@ func cacheKeyForArticleLink(item *feeds.Item) (string, error) { } else if item.Source != nil { uniqLinkStr += item.Source.Href } - return util.GetMD5Hash(uniqLinkStr), nil + return util.GetTextContentHash(uniqLinkStr), nil } // ======================================= diff --git a/internal/dao/migrate.go b/internal/dao/migrate.go index 017a6ed4..cc6c0580 100644 --- a/internal/dao/migrate.go +++ b/internal/dao/migrate.go @@ -105,7 +105,7 @@ var defaultAdminUser = User{ } func createAdminUser(db *gorm.DB) { - md5Password := util.GetMD5Hash(defaultPassword) + md5Password := util.GetPasswordMD5Hash(defaultPassword) // 检查是否已经存在 admin 用户 var user User @@ -128,6 +128,6 @@ func createAdminUser(db *gorm.DB) { func ResetAdminPassword() error { logrus.Info("resetting admin password...") db := util.GetDatabase() - md5Password := util.GetMD5Hash(defaultPassword) + md5Password := util.GetPasswordMD5Hash(defaultPassword) return UpdateUserPassword(db, &defaultAdminUser, md5Password) } diff --git a/internal/dao/recipe.go b/internal/dao/recipe.go index 98913202..9b091855 100644 --- a/internal/dao/recipe.go +++ b/internal/dao/recipe.go @@ -85,7 +85,14 @@ func DeleteCustomRecipe(db *gorm.DB, id string) error { // DeleteCustomRecipeV2 deletes a CustomRecipeV2 record by its ID func DeleteCustomRecipeV2(db *gorm.DB, id string) error { var recipe CustomRecipeV2 - return db.Where("id = ?", id).Delete(&recipe).Error + result := db.Where("id = ?", id).Delete(&recipe) + if result.Error != nil { + return result.Error + } + if result.RowsAffected == 0 { + return gorm.ErrRecordNotFound + } + return nil } func ListCustomRecipe(db *gorm.DB) ([]*CustomRecipe, error) { diff --git a/internal/engine/topic.go b/internal/engine/topic.go index f306612a..c6f036e6 100644 --- a/internal/engine/topic.go +++ b/internal/engine/topic.go @@ -74,10 +74,9 @@ func (t *TopicFeed) Fetch(ctx context.Context) (*model.CraftFeed, error) { Title: t.Title, Description: t.Description, Link: t.Link, - Updated: time.Now(), - Created: time.Now(), Articles: allArticles, } + applyTopicFeedTimestamps(mergedFeed) // If there's an aggregator pipeline (e.g., deduplicate -> sort -> limit), run it. if t.Aggregator != nil { @@ -99,6 +98,7 @@ func (t *TopicFeed) Fetch(ctx context.Context) (*model.CraftFeed, error) { }) return nil, err } + applyTopicFeedTimestamps(processedFeed) if len(processedFeed.Articles) == 0 && len(failedInputs) > 0 { reportTopicResult(ctx, t, processedFeed, failedInputs, startedAt) return nil, errors.New("topic failed because all upstream providers failed or produced no items") @@ -116,6 +116,34 @@ func (t *TopicFeed) Fetch(ctx context.Context) (*model.CraftFeed, error) { return mergedFeed, nil } +func applyTopicFeedTimestamps(feed *model.CraftFeed) { + if feed == nil { + return + } + + var latestUpdated time.Time + var latestCreated time.Time + + for _, article := range feed.Articles { + if article == nil { + continue + } + if article.Updated.After(latestUpdated) { + latestUpdated = article.Updated + } + if article.Created.After(latestCreated) { + latestCreated = article.Created + } + } + + if !latestUpdated.IsZero() { + feed.Updated = latestUpdated + } + if !latestCreated.IsZero() { + feed.Created = latestCreated + } +} + func reportTopicResult(ctx context.Context, topic *TopicFeed, feed *model.CraftFeed, failedInputs []map[string]any, startedAt time.Time) { status := dao.ExecutionStatusSuccess errorKind := "" diff --git a/internal/engine/topic_test.go b/internal/engine/topic_test.go index d543c806..9782aedd 100644 --- a/internal/engine/topic_test.go +++ b/internal/engine/topic_test.go @@ -25,18 +25,25 @@ func (m *MockProvider) Fetch(ctx context.Context) (*model.CraftFeed, error) { } func TestTopicFeed_Fetch_Success(t *testing.T) { + updated1 := time.Date(2026, 4, 1, 10, 0, 0, 0, time.UTC) + created1 := time.Date(2026, 4, 1, 9, 0, 0, 0, time.UTC) + updated2 := time.Date(2026, 4, 2, 11, 0, 0, 0, time.UTC) + created2 := time.Date(2026, 4, 2, 8, 0, 0, 0, time.UTC) + updated3 := time.Date(2026, 4, 3, 12, 0, 0, 0, time.UTC) + created3 := time.Date(2026, 4, 3, 7, 0, 0, 0, time.UTC) + provider1 := &MockProvider{ Feed: &model.CraftFeed{ Articles: []*model.CraftArticle{ - {Id: "1", Title: "Article 1"}, - {Id: "2", Title: "Article 2"}, + {Id: "1", Title: "Article 1", Updated: updated1, Created: created1}, + {Id: "2", Title: "Article 2", Updated: updated2, Created: created2}, }, }, } provider2 := &MockProvider{ Feed: &model.CraftFeed{ Articles: []*model.CraftArticle{ - {Id: "3", Title: "Article 3"}, + {Id: "3", Title: "Article 3", Updated: updated3, Created: created3}, }, }, } @@ -53,6 +60,8 @@ func TestTopicFeed_Fetch_Success(t *testing.T) { // Should contain 3 articles total assert.Len(t, result.Articles, 3) + assert.True(t, result.Updated.Equal(updated3)) + assert.True(t, result.Created.Equal(created3)) } func TestTopicFeed_Fetch_PartialFailure(t *testing.T) { @@ -83,12 +92,19 @@ func TestTopicFeed_Fetch_PartialFailure(t *testing.T) { } func TestTopicFeed_Fetch_WithAggregator(t *testing.T) { + updated1 := time.Date(2026, 4, 1, 10, 0, 0, 0, time.UTC) + updated2 := time.Date(2026, 4, 2, 10, 0, 0, 0, time.UTC) + updated3 := time.Date(2026, 4, 3, 10, 0, 0, 0, time.UTC) + created1 := time.Date(2026, 4, 1, 9, 0, 0, 0, time.UTC) + created2 := time.Date(2026, 4, 2, 9, 0, 0, 0, time.UTC) + created3 := time.Date(2026, 4, 3, 9, 0, 0, 0, time.UTC) + provider := &MockProvider{ Feed: &model.CraftFeed{ Articles: []*model.CraftArticle{ - {Id: "1", Title: "A"}, - {Id: "2", Title: "B"}, - {Id: "3", Title: "C"}, + {Id: "1", Title: "A", Updated: updated1, Created: created1}, + {Id: "2", Title: "B", Updated: updated2, Created: created2}, + {Id: "3", Title: "C", Updated: updated3, Created: created3}, }, }, } @@ -105,6 +121,8 @@ func TestTopicFeed_Fetch_WithAggregator(t *testing.T) { assert.NoError(t, err) assert.NotNil(t, result) assert.Len(t, result.Articles, 2) + assert.True(t, result.Updated.Equal(updated2)) + assert.True(t, result.Created.Equal(created2)) } func TestTopicFeed_Fetch_AllInputsFailed(t *testing.T) { diff --git a/internal/feedruntime/builder.go b/internal/feedruntime/builder.go index bfa3fc2a..7235f180 100644 --- a/internal/feedruntime/builder.go +++ b/internal/feedruntime/builder.go @@ -173,7 +173,6 @@ func (b *Builder) BuildRecipe(ctx context.Context, recipeData *dao.CustomRecipeV if err != nil { return nil, err } - provider, err := b.BuildProviderFromInput(ctx, inputSpec, nil) if err != nil { return nil, err diff --git a/internal/feedruntime/builder_test.go b/internal/feedruntime/builder_test.go index 2bde5dd0..7b238faa 100644 --- a/internal/feedruntime/builder_test.go +++ b/internal/feedruntime/builder_test.go @@ -3,6 +3,9 @@ package feedruntime import ( "context" "errors" + "io" + "net/http" + "net/http/httptest" "testing" "time" @@ -143,14 +146,46 @@ func TestBuildRecipe_UsesSourceInputSpecCompatibility(t *testing.T) { assert.Equal(t, "stub-feed", feed.Title) } -func TestBuildProviderFromInput_InvalidURI(t *testing.T) { - builder := NewBuilder(newTestDB(t)) - _, err := builder.BuildProviderFromInput(context.Background(), InputSpec{ - Kind: InputKindURI, - URI: "feedcraft://recipe", - }, nil) - require.Error(t, err) - assert.Contains(t, err.Error(), "missing resource id") +func TestProxyRecipeFetch_UsesDefaultUserAgent(t *testing.T) { + var gotUA string + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + gotUA = r.Header.Get("User-Agent") + w.Header().Set("Content-Type", "application/rss+xml") + _, _ = io.WriteString(w, ` + + + Proxy Feed + https://example.com/ + Proxy test feed + + Item 1 + https://example.com/item-1 + Hello + + +`) + })) + defer server.Close() + + db := newTestDB(t) + require.NoError(t, db.Create(&dao.CustomRecipeV2{ + ID: "proxy-runtime-default-ua", + Craft: "proxy", + SourceType: string(constant.SourceRSS), + SourceConfig: `{ + "type":"rss", + "http_fetcher":{"url":"` + server.URL + `"} + }`, + }).Error) + + builder := NewBuilder(db) + provider, err := builder.BuildRecipeProvider(context.Background(), "proxy-runtime-default-ua") + require.NoError(t, err) + + feed, err := provider.Fetch(context.Background()) + require.NoError(t, err) + require.NotNil(t, feed) + assert.Equal(t, "FeedCraft/2.0", gotUA) } func TestBuildAggregator(t *testing.T) { @@ -235,7 +270,7 @@ func newTestDB(t *testing.T) *gorm.DB { dsn := "file:" + t.Name() + "?mode=memory&cache=shared" db, err := gorm.Open(sqlite.Open(dsn), &gorm.Config{}) require.NoError(t, err) - require.NoError(t, db.AutoMigrate(&dao.CustomRecipeV2{}, &dao.TopicFeed{})) + require.NoError(t, db.AutoMigrate(&dao.CustomRecipeV2{}, &dao.TopicFeed{}, &dao.CraftAtom{})) return db } diff --git a/internal/router/registry.go b/internal/router/registry.go index 38ae0e92..5dd623ee 100644 --- a/internal/router/registry.go +++ b/internal/router/registry.go @@ -35,6 +35,7 @@ func RegisterRouters(router *gin.Engine) { router.LoadHTMLFiles("web/index.html") router.LoadHTMLFiles("web/start.html") router.StaticFile("/start.html", "web/start.html") + router.StaticFile("/favicon.ico", "web/favicon.ico") //router.GET("/start.html", func(c *gin.Context) { // c.HTML(http.StatusOK, "start.html", gin.H{ // "SiteBaseUrl": siteBaseUrl, @@ -123,6 +124,7 @@ func RegisterRouters(router *gin.Engine) { adminApi.POST("/tools/fetch", controller.HtmlFetch) adminApi.POST("/tools/parse", controller.HtmlParse) + adminApi.GET("/tools/feed/preview", controller.PreviewFeedViewer) adminApi.POST("/tools/json/fetch", controller.CurlFetch) adminApi.POST("/tools/json/parse", controller.CurlParse) diff --git a/internal/source/fetcher/http_fetcher.go b/internal/source/fetcher/http_fetcher.go index f1e4e308..df2af84b 100644 --- a/internal/source/fetcher/http_fetcher.go +++ b/internal/source/fetcher/http_fetcher.go @@ -4,13 +4,23 @@ import ( "FeedCraft/internal/config" "FeedCraft/internal/util" "context" + "errors" "fmt" "io" "net/http" "strings" "time" + + retry "github.com/avast/retry-go/v4" ) +const MaxResponseBodySize = 10 * 1024 * 1024 // 10MB + +type requestProfile struct { + defaultHeaders map[string]string + retryAttempts uint +} + // HttpFetcher is a simple fetcher based on http.Get. type HttpFetcher struct { Config *config.HttpFetcherConfig @@ -31,9 +41,35 @@ func (f *HttpFetcher) Fetch(ctx context.Context) ([]byte, error) { return []byte(content), nil } + profile := resolveRequestProfile(f.Config) + var body []byte + err := retry.Do( + func() error { + result, err := f.doRequest(ctx, profile) + if err != nil { + return err + } + body = result + return nil + }, + retry.Context(ctx), + retry.Attempts(profile.retryAttempts), + retry.Delay(300*time.Millisecond), + retry.DelayType(retry.FixedDelay), + retry.RetryIf(isRetryableFetchError), + retry.LastErrorOnly(true), + ) + if err != nil { + return nil, err + } + + return body, nil +} + +func (f *HttpFetcher) doRequest(ctx context.Context, profile requestProfile) ([]byte, error) { method := f.Config.Method if method == "" { - method = "GET" + method = http.MethodGet } var bodyReader io.Reader @@ -46,27 +82,32 @@ func (f *HttpFetcher) Fetch(ctx context.Context) ([]byte, error) { return nil, fmt.Errorf("failed to create request: %w", err) } - // It is good practice to set a user-agent. - req.Header.Set("User-Agent", "FeedCraft/2.0") - - // Set custom headers from config + for key, value := range profile.defaultHeaders { + req.Header.Set(key, value) + } for key, value := range f.Config.Headers { req.Header.Set(key, value) } resp, err := http.DefaultClient.Do(req) if err != nil { - return nil, fmt.Errorf("http get failed: %w", err) + return nil, &fetchError{err: fmt.Errorf("http get failed: %w", err), retryable: true} } defer func() { _ = resp.Body.Close() }() if resp.StatusCode != http.StatusOK { - return nil, fmt.Errorf("http status not ok: %s", resp.Status) + return nil, &fetchError{ + err: fmt.Errorf("http status not ok: %s", resp.Status), + retryable: isRetryableStatus(resp.StatusCode), + } } - body, err := io.ReadAll(resp.Body) + body, err := io.ReadAll(io.LimitReader(resp.Body, MaxResponseBodySize+1)) if err != nil { - return nil, fmt.Errorf("failed to read response body: %w", err) + return nil, &fetchError{err: fmt.Errorf("failed to read response body: %w", err), retryable: true} + } + if len(body) > MaxResponseBodySize { + return nil, &fetchError{err: fmt.Errorf("response body exceeds the maximum size limit of %d bytes", MaxResponseBodySize), retryable: false} } return body, nil @@ -78,3 +119,57 @@ func (f *HttpFetcher) BaseURL() string { } return f.Config.URL } + +func resolveRequestProfile(cfg *config.HttpFetcherConfig) requestProfile { + if cfg != nil && cfg.Purpose == config.HttpFetcherPurposeHTML { + return requestProfile{ + defaultHeaders: HTMLDefaultHeaders(), + retryAttempts: 3, + } + } + + return requestProfile{ + defaultHeaders: map[string]string{ + "User-Agent": util.DefaultFeedUserAgent(), + }, + retryAttempts: 1, + } +} + +func HTMLDefaultHeaders() map[string]string { + return map[string]string{ + "User-Agent": util.DefaultHTMLUserAgent(), + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", + "Accept-Language": "en-US,en;q=0.9", + "Upgrade-Insecure-Requests": "1", + "Sec-Fetch-Dest": "document", + "Sec-Fetch-Mode": "navigate", + "Sec-Fetch-Site": "none", + "Sec-Fetch-User": "?1", + } +} + +type fetchError struct { + err error + retryable bool +} + +func (e *fetchError) Error() string { + return e.err.Error() +} + +func (e *fetchError) Unwrap() error { + return e.err +} + +func isRetryableFetchError(err error) bool { + var fetchErr *fetchError + if !errors.As(err, &fetchErr) { + return false + } + return fetchErr.retryable +} + +func isRetryableStatus(statusCode int) bool { + return statusCode == http.StatusTooManyRequests || statusCode >= http.StatusInternalServerError +} diff --git a/internal/source/fetcher/http_fetcher_test.go b/internal/source/fetcher/http_fetcher_test.go new file mode 100644 index 00000000..5f5ffad3 --- /dev/null +++ b/internal/source/fetcher/http_fetcher_test.go @@ -0,0 +1,123 @@ +package fetcher + +import ( + "FeedCraft/internal/config" + "FeedCraft/internal/util" + "context" + "io" + "net/http" + "net/http/httptest" + "testing" +) + +func TestHttpFetcherUsesDefaultFeedUserAgent(t *testing.T) { + var gotUA string + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + gotUA = r.Header.Get("User-Agent") + _, _ = io.WriteString(w, "ok") + })) + defer server.Close() + + fetcher := &HttpFetcher{Config: &config.HttpFetcherConfig{URL: server.URL}} + _, err := fetcher.Fetch(context.Background()) + if err != nil { + t.Fatalf("Fetch returned error: %v", err) + } + if gotUA != util.DefaultFeedUserAgent() { + t.Fatalf("expected default feed user agent, got %q", gotUA) + } +} + +func TestHttpFetcherAllowsHeaderOverride(t *testing.T) { + var gotUA string + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + gotUA = r.Header.Get("User-Agent") + _, _ = io.WriteString(w, "ok") + })) + defer server.Close() + + fetcher := &HttpFetcher{Config: &config.HttpFetcherConfig{ + URL: server.URL, + Headers: map[string]string{ + "User-Agent": "SourceSpecific/1.2.3", + }, + }} + _, err := fetcher.Fetch(context.Background()) + if err != nil { + t.Fatalf("Fetch returned error: %v", err) + } + if gotUA != "SourceSpecific/1.2.3" { + t.Fatalf("expected source header override, got %q", gotUA) + } +} + +func TestHttpFetcherHTMLPurposeUsesDefaultHTMLUserAgent(t *testing.T) { + var gotUA, gotAccept string + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + gotUA = r.Header.Get("User-Agent") + gotAccept = r.Header.Get("Accept") + _, _ = io.WriteString(w, "ok") + })) + defer server.Close() + + fetcher := &HttpFetcher{Config: &config.HttpFetcherConfig{ + URL: server.URL, + Purpose: config.HttpFetcherPurposeHTML, + }} + _, err := fetcher.Fetch(context.Background()) + if err != nil { + t.Fatalf("Fetch returned error: %v", err) + } + if gotUA != util.DefaultHTMLUserAgent() { + t.Fatalf("expected html default user agent, got %q", gotUA) + } + if gotAccept == "" { + t.Fatal("expected html accept header to be set") + } +} + +func TestHttpFetcherHTMLPurposeRetriesRetryableStatus(t *testing.T) { + attempts := 0 + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + attempts++ + if attempts < 3 { + w.WriteHeader(http.StatusTooManyRequests) + return + } + _, _ = io.WriteString(w, "ok") + })) + defer server.Close() + + fetcher := &HttpFetcher{Config: &config.HttpFetcherConfig{ + URL: server.URL, + Purpose: config.HttpFetcherPurposeHTML, + }} + _, err := fetcher.Fetch(context.Background()) + if err != nil { + t.Fatalf("Fetch returned error: %v", err) + } + if attempts != 3 { + t.Fatalf("expected 3 attempts, got %d", attempts) + } +} + +func TestHttpFetcherFeedPurposeDoesNotRetryOnRetryableStatus(t *testing.T) { + attempts := 0 + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + attempts++ + w.WriteHeader(http.StatusTooManyRequests) + })) + defer server.Close() + + fetcher := &HttpFetcher{Config: &config.HttpFetcherConfig{ + URL: server.URL, + Purpose: config.HttpFetcherPurposeFeed, + }} + _, err := fetcher.Fetch(context.Background()) + if err == nil { + t.Fatal("expected fetch to fail") + } + if attempts != 1 { + t.Fatalf("expected 1 attempt, got %d", attempts) + } +} diff --git a/internal/source/fetcher/provider/litellm.go b/internal/source/fetcher/provider/litellm.go index 9aa3ee49..899b5e0d 100644 --- a/internal/source/fetcher/provider/litellm.go +++ b/internal/source/fetcher/provider/litellm.go @@ -2,6 +2,7 @@ package provider import ( "FeedCraft/internal/config" + "FeedCraft/internal/util" "context" "fmt" "strings" @@ -55,6 +56,8 @@ func (p *LiteLLMProvider) Fetch(ctx context.Context, query string) ([]byte, erro req := p.Client.R(). SetContext(ctx). + // NOTE: search provider requests are temporarily grouped under feed UA rules. + SetHeader("User-Agent", util.DefaultFeedUserAgent()). SetBody(reqBody) if p.Config.APIKey != "" { diff --git a/internal/source/fetcher/provider/litellm_test.go b/internal/source/fetcher/provider/litellm_test.go index 7b807cf5..bfb385a8 100644 --- a/internal/source/fetcher/provider/litellm_test.go +++ b/internal/source/fetcher/provider/litellm_test.go @@ -3,6 +3,7 @@ package provider import ( "FeedCraft/internal/config" "FeedCraft/internal/source/parser" + "FeedCraft/internal/util" "context" "net/http" "net/http/httptest" @@ -12,7 +13,6 @@ import ( ) func TestLiteLLMProvider_EndToEnd(t *testing.T) { - // Standard API response from LiteLLM (often similar to OpenAI/Bing search results) mockResponse := `{ "results": [ { @@ -31,6 +31,7 @@ func TestLiteLLMProvider_EndToEnd(t *testing.T) { ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { assert.Equal(t, "/", r.URL.Path) assert.Equal(t, "POST", r.Method) + assert.Equal(t, util.DefaultFeedUserAgent(), r.Header.Get("User-Agent")) w.Header().Set("Content-Type", "application/json") _, _ = w.Write([]byte(mockResponse)) })) @@ -41,29 +42,20 @@ func TestLiteLLMProvider_EndToEnd(t *testing.T) { } provider := NewLiteLLMProvider(cfg) - - // 1. Fetch the data data, err := provider.Fetch(context.Background(), "test litellm query") assert.NoError(t, err) assert.NotNil(t, data) - // 2. Parse the data using the default parser configuration parserConfig := provider.GetDefaultParserConfig() jsonParser := &parser.JsonParser{Config: parserConfig} feed, err := jsonParser.Parse(data) assert.NoError(t, err) assert.NotNil(t, feed) - - // 3. Verify the parsed feed items assert.Len(t, feed.Articles, 2) - - // First item verification assert.Equal(t, "LiteLLM Title 1", feed.Articles[0].Title) assert.Equal(t, "https://example.com/litellm1", feed.Articles[0].Link) assert.Equal(t, "This is a snippet for the first result.", feed.Articles[0].Description) - - // Second item verification assert.Equal(t, "LiteLLM Title 2", feed.Articles[1].Title) assert.Equal(t, "https://example.com/litellm2", feed.Articles[1].Link) assert.Equal(t, "Snippet for the second LiteLLM search result.", feed.Articles[1].Description) diff --git a/internal/source/fetcher/provider/searxng.go b/internal/source/fetcher/provider/searxng.go index eb79ec50..7ae32331 100644 --- a/internal/source/fetcher/provider/searxng.go +++ b/internal/source/fetcher/provider/searxng.go @@ -2,6 +2,7 @@ package provider import ( "FeedCraft/internal/config" + "FeedCraft/internal/util" "context" "fmt" "net/url" @@ -53,7 +54,10 @@ func (p *SearXNGProvider) Fetch(ctx context.Context, query string) ([]byte, erro p.Client = resty.New().SetTimeout(10 * time.Second) } - req := p.Client.R().SetContext(ctx) + req := p.Client.R(). + SetContext(ctx). + // NOTE: search provider requests are temporarily grouped under feed UA rules. + SetHeader("User-Agent", util.DefaultFeedUserAgent()) // Add Authorization header if API Key is present (useful for private instances with Basic/Bearer auth) if p.Config.APIKey != "" { diff --git a/internal/source/fetcher/provider/searxng_test.go b/internal/source/fetcher/provider/searxng_test.go index cfa207d2..a5604463 100644 --- a/internal/source/fetcher/provider/searxng_test.go +++ b/internal/source/fetcher/provider/searxng_test.go @@ -3,6 +3,7 @@ package provider import ( "FeedCraft/internal/config" "FeedCraft/internal/source/parser" + "FeedCraft/internal/util" "context" "net/http" "net/http/httptest" @@ -12,7 +13,6 @@ import ( ) func TestSearXNGProvider_EndToEnd(t *testing.T) { - // Standard API response from SearXNG mockResponse := `{ "query": "test query", "number_of_results": 2, @@ -41,6 +41,7 @@ func TestSearXNGProvider_EndToEnd(t *testing.T) { assert.Equal(t, "/search", r.URL.Path) assert.Equal(t, "test query", r.URL.Query().Get("q")) assert.Equal(t, "json", r.URL.Query().Get("format")) + assert.Equal(t, util.DefaultFeedUserAgent(), r.Header.Get("User-Agent")) w.Header().Set("Content-Type", "application/json") _, _ = w.Write([]byte(mockResponse)) })) @@ -54,30 +55,21 @@ func TestSearXNGProvider_EndToEnd(t *testing.T) { } provider := NewSearXNGProvider(cfg) - - // 1. Fetch the data data, err := provider.Fetch(context.Background(), "test query") assert.NoError(t, err) assert.NotNil(t, data) - // 2. Parse the data using the default parser configuration parserConfig := provider.GetDefaultParserConfig() jsonParser := &parser.JsonParser{Config: parserConfig} feed, err := jsonParser.Parse(data) assert.NoError(t, err) assert.NotNil(t, feed) - - // 3. Verify the parsed feed items assert.Len(t, feed.Articles, 2) - - // First item verification assert.Equal(t, "Example Domain 1", feed.Articles[0].Title) assert.Equal(t, "https://example.com/1", feed.Articles[0].Link) assert.Equal(t, "This domain is for use in illustrative examples.", feed.Articles[0].Description) assert.False(t, feed.Articles[0].Created.IsZero()) - - // Second item verification (missing publishedDate) assert.Equal(t, "Example Domain 2", feed.Articles[1].Title) assert.Equal(t, "https://example.com/2", feed.Articles[1].Link) assert.Equal(t, "More illustrative examples here.", feed.Articles[1].Description) diff --git a/internal/source/html.go b/internal/source/html.go index 67c3d7db..6607600d 100644 --- a/internal/source/html.go +++ b/internal/source/html.go @@ -19,6 +19,9 @@ func htmlSourceFactory(cfg *config.SourceConfig) (Source, error) { if cfg.HtmlParser == nil { return nil, fmt.Errorf("html_parser config is required for html source") } + if cfg.HttpFetcher.Purpose == "" { + cfg.HttpFetcher.Purpose = config.HttpFetcherPurposeHTML + } return &PipelineSource{ Config: cfg, diff --git a/internal/source/json.go b/internal/source/json.go index c77297b2..18d5fada 100644 --- a/internal/source/json.go +++ b/internal/source/json.go @@ -19,6 +19,9 @@ func jsonSourceFactory(cfg *config.SourceConfig) (Source, error) { if cfg.JsonParser == nil { return nil, fmt.Errorf("json_parser config is required for json source") } + if cfg.HttpFetcher.Purpose == "" { + cfg.HttpFetcher.Purpose = config.HttpFetcherPurposeHTML + } return &PipelineSource{ Config: cfg, diff --git a/internal/source/parser/json_parser.go b/internal/source/parser/json_parser.go index fe214f76..d7700eba 100644 --- a/internal/source/parser/json_parser.go +++ b/internal/source/parser/json_parser.go @@ -3,6 +3,7 @@ package parser import ( "FeedCraft/internal/config" "FeedCraft/internal/model" + "bytes" "encoding/json" "fmt" ) @@ -17,7 +18,9 @@ func (p *JsonParser) Parse(data []byte) (*model.CraftFeed, error) { } var rawData interface{} - if err := json.Unmarshal(data, &rawData); err != nil { + decoder := json.NewDecoder(bytes.NewReader(data)) + decoder.UseNumber() + if err := decoder.Decode(&rawData); err != nil { return nil, fmt.Errorf("invalid json data: %w", err) } diff --git a/internal/source/parser/json_parser_test.go b/internal/source/parser/json_parser_test.go index 558a2d29..80fa4f28 100644 --- a/internal/source/parser/json_parser_test.go +++ b/internal/source/parser/json_parser_test.go @@ -111,6 +111,31 @@ func TestJsonParser_Parse_WithItemTemplateOnly(t *testing.T) { } } +func TestJsonParser_Parse_LargeNumberID(t *testing.T) { + jsonContent := `{ + "items": [ + { + "id": 12345678901234567890, + "title": "Entry" + } + ] + }` + + cfg := &config.JsonParserConfig{ + ItemsIterator: ".items[]", + Title: ".title", + LinkTemplate: "https://example.com/article/{{ .Item.id }}", + } + + parser := &JsonParser{Config: cfg} + feed, err := parser.Parse([]byte(jsonContent)) + + assert.NoError(t, err) + if assert.NotNil(t, feed) && assert.Len(t, feed.Articles, 1) { + assert.Equal(t, "https://example.com/article/12345678901234567890", feed.Articles[0].Link) + } +} + func TestJsonParser_Parse_Error(t *testing.T) { // Test case where field selector causes a runtime error in jq // e.g. trying to iterate a string diff --git a/internal/source/rss.go b/internal/source/rss.go index 9fdc9b96..e40da05c 100644 --- a/internal/source/rss.go +++ b/internal/source/rss.go @@ -16,6 +16,9 @@ func rssSourceFactory(cfg *config.SourceConfig) (Source, error) { if cfg.HttpFetcher == nil { return nil, fmt.Errorf("http_fetcher config is required for rss source") } + if cfg.HttpFetcher.Purpose == "" { + cfg.HttpFetcher.Purpose = config.HttpFetcherPurposeFeed + } return &PipelineSource{ Config: cfg, // Inject the full config diff --git a/internal/source/source_factory_test.go b/internal/source/source_factory_test.go new file mode 100644 index 00000000..9c992731 --- /dev/null +++ b/internal/source/source_factory_test.go @@ -0,0 +1,68 @@ +package source + +import ( + "FeedCraft/internal/config" + "FeedCraft/internal/constant" + "testing" +) + +func TestRSSSourceFactorySetsFeedPurposeByDefault(t *testing.T) { + cfg := &config.SourceConfig{ + Type: constant.SourceRSS, + HttpFetcher: &config.HttpFetcherConfig{ + URL: "https://example.com/feed.xml", + }, + } + + _, err := rssSourceFactory(cfg) + if err != nil { + t.Fatalf("rssSourceFactory returned error: %v", err) + } + if cfg.HttpFetcher.Purpose != config.HttpFetcherPurposeFeed { + t.Fatalf("expected purpose %q, got %q", config.HttpFetcherPurposeFeed, cfg.HttpFetcher.Purpose) + } +} + +func TestHTMLSourceFactorySetsHTMLPurposeByDefault(t *testing.T) { + cfg := &config.SourceConfig{ + Type: constant.SourceHTML, + HttpFetcher: &config.HttpFetcherConfig{ + URL: "https://example.com/page", + }, + HtmlParser: &config.HtmlParserConfig{ + ItemSelector: ".item", + Title: ".title", + Link: ".link", + }, + } + + _, err := htmlSourceFactory(cfg) + if err != nil { + t.Fatalf("htmlSourceFactory returned error: %v", err) + } + if cfg.HttpFetcher.Purpose != config.HttpFetcherPurposeHTML { + t.Fatalf("expected purpose %q, got %q", config.HttpFetcherPurposeHTML, cfg.HttpFetcher.Purpose) + } +} + +func TestJSONSourceFactorySetsHTMLPurposeByDefault(t *testing.T) { + cfg := &config.SourceConfig{ + Type: constant.SourceJSON, + HttpFetcher: &config.HttpFetcherConfig{ + URL: "https://example.com/api/items", + }, + JsonParser: &config.JsonParserConfig{ + ItemsIterator: ".items", + Title: ".title", + Link: ".link", + }, + } + + _, err := jsonSourceFactory(cfg) + if err != nil { + t.Fatalf("jsonSourceFactory returned error: %v", err) + } + if cfg.HttpFetcher.Purpose != config.HttpFetcherPurposeHTML { + t.Fatalf("expected purpose %q, got %q", config.HttpFetcherPurposeHTML, cfg.HttpFetcher.Purpose) + } +} diff --git a/internal/util/browserless.go b/internal/util/browserless.go index e755277c..06a405e7 100644 --- a/internal/util/browserless.go +++ b/internal/util/browserless.go @@ -78,7 +78,14 @@ func GetBrowserlessContent(websiteUrl string, options BrowserlessOptions) (strin } if response.StatusCode() != http.StatusOK { - return "", fmt.Errorf("browserless service returned status %d: %s", response.StatusCode(), response.String()) + respStr := response.String() + logrus.Errorf("browserless service returned status %d. URL: %s, response body: %s", response.StatusCode(), websiteUrl, respStr) + + truncLen := 200 + if len(respStr) > truncLen { + respStr = respStr[:truncLen] + "..." + } + return "", fmt.Errorf("browserless service returned status %d: %s", response.StatusCode(), respStr) } return response.String(), nil diff --git a/internal/util/env_var.go b/internal/util/env_var.go index af3378a2..0b72c070 100644 --- a/internal/util/env_var.go +++ b/internal/util/env_var.go @@ -1,9 +1,32 @@ package util import ( + "strings" + "github.com/spf13/viper" ) +const ( + defaultFeedUserAgent = "FeedCraft/2.0" + htmlDefaultUserAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36" +) + +func DefaultFeedUserAgent() string { + value := strings.TrimSpace(GetEnvClient().GetString("HTTP_USER_AGENT_FEED")) + if value == "" { + return defaultFeedUserAgent + } + return value +} + +func DefaultHTMLUserAgent() string { + value := strings.TrimSpace(GetEnvClient().GetString("HTTP_USER_AGENT_HTML")) + if value == "" { + return htmlDefaultUserAgent + } + return value +} + func GetEnvClient() *viper.Viper { v := viper.New() diff --git a/internal/util/env_var_test.go b/internal/util/env_var_test.go new file mode 100644 index 00000000..eaa4265a --- /dev/null +++ b/internal/util/env_var_test.go @@ -0,0 +1,29 @@ +package util + +import ( + "testing" +) + +func TestDefaultFeedUserAgent(t *testing.T) { + t.Setenv("FC_HTTP_USER_AGENT_FEED", "") + if got := DefaultFeedUserAgent(); got != defaultFeedUserAgent { + t.Fatalf("expected default feed user agent %q, got %q", defaultFeedUserAgent, got) + } + + t.Setenv("FC_HTTP_USER_AGENT_FEED", "CustomFeedUA/1.0") + if got := DefaultFeedUserAgent(); got != "CustomFeedUA/1.0" { + t.Fatalf("expected custom feed user agent, got %q", got) + } +} + +func TestDefaultHTMLUserAgent(t *testing.T) { + t.Setenv("FC_HTTP_USER_AGENT_HTML", "") + if got := DefaultHTMLUserAgent(); got != htmlDefaultUserAgent { + t.Fatalf("expected default html user agent %q, got %q", htmlDefaultUserAgent, got) + } + + t.Setenv("FC_HTTP_USER_AGENT_HTML", "CustomHTMLUA/2.0") + if got := DefaultHTMLUserAgent(); got != "CustomHTMLUA/2.0" { + t.Fatalf("expected custom html user agent, got %q", got) + } +} diff --git a/internal/util/hash.go b/internal/util/hash.go index 32c595ff..530fbc6a 100644 --- a/internal/util/hash.go +++ b/internal/util/hash.go @@ -3,10 +3,17 @@ package util import ( "crypto/md5" "encoding/hex" + "hash/fnv" ) -func GetMD5Hash(text string) string { +func GetTextContentHash(text string) string { + h := fnv.New64a() + _, _ = h.Write([]byte(text)) + return hex.EncodeToString(h.Sum(nil)) +} + +func GetPasswordMD5Hash(text string) string { h := md5.New() - h.Write([]byte(text)) + _, _ = h.Write([]byte(text)) return hex.EncodeToString(h.Sum(nil)) } diff --git a/proposal/future/html_token_optimization.md b/proposal/future/html_token_optimization.md new file mode 100644 index 00000000..7207532f --- /dev/null +++ b/proposal/future/html_token_optimization.md @@ -0,0 +1,334 @@ +# HTML Token Optimization for LLM Input + +> 状态:规划中 + +## 1. 需求背景 + +当前 FeedCraft 已经有统一的 LLM 调用入口与内容预处理链路,但 HTML 进入 LLM 前的清洗仍然比较粗糙。很多页面会带上大量对 RSS 场景无价值、但会显著消耗 token 的内容,例如: + +- `script` / `style` / `noscript` 等无关节点 +- `class` / `style` / `id` / `aria-*` / `data-*` 等低价值属性 +- 很长的 `href` / `src` / `srcset` +- base64 `data:` 图片 +- 过多空白、缩进与样式噪音 + +这些内容会带来两个问题: + +1. **增加 LLM 成本**:同样的正文语义会占用更多 token。 +2. **干扰模型理解**:无关 HTML 噪音会稀释正文、图片、链接等真正重要的信息。 + +因此需要在现有架构中增加一层简洁、稳定、可配置的 HTML 优化逻辑,在不破坏主要语义的前提下,尽量缩小送入 LLM 的内容体积。 + +## 2. 目标 + +本方案希望实现一套面向 LLM 输入的 HTML 优化机制,满足以下目标: + +- 尽量复用现有 `ProcessContent` 预处理入口,不重新发明新链路 +- 通过 DOM 级处理删除无意义 HTML 内容,而不是只靠正则替换 +- 支持不同 Craft 使用不同优化等级 +- 保持配置模型足够小,但能表达“保留多少原始内容”这种关键差异 +- 让 tag / attr 规则集中定义,方便后续维护 +- 为 placeholder 替换与恢复提供简单、局部的机制 +- 补充单独测试,确保优化结果稳定、可预期 + +## 3. 适用场景与等级差异 + +不同 LLM 场景对 HTML 保真度的要求并不一样,因此这里不适合只用一个 `bool` 开关。 + +### 3.1 更激进的场景 + +例如: + +- summary +- llm filter +- 条件判断类 craft + +这类场景重点是提取正文语义或做分类判断,不需要保留太多原始结构。对于它们,可以更激进地: + +- 删除更多无关节点 +- 移除大部分低价值属性 +- 对链接和图片做更强压缩或直接去除 + +### 3.2 中等保留的场景 + +例如: + +- 常规 translate +- beautify + +这些场景仍然希望模型理解原文结构,且输出内容最好保留链接、图片等信息,因此应该: + +- 保留主要结构标签 +- 保留关键资源属性 +- 仅去掉明显无意义的噪音 +- 仅替换超长 URL / `data:` URI + +### 3.3 最保守的场景 + +例如: + +- immersive-translate + +这种场景强调尽量保留原始格式、链接、图片以及更多内容组织形式,因此优化应该尽量保守,只做: + +- 明确无意义节点移除 +- 低价值属性清理 +- 极端长字段压缩 +- 空白压缩 + +## 4. 设计原则 + +### 4.1 单一入口 + +优先把 HTML 优化纳入现有的 `internal/util/content_processor.go`,作为 `ProcessContent` 的一部分,这样: + +- 大部分 LLM 调用链无需额外重构 +- 现有 `RemoveLinks` / `RemoveImage` / `ConvertToMd` 能继续复用 +- 逻辑集中,后续更容易维护 + +### 4.2 配置驱动,而不是 craft 分支硬编码 + +不希望在代码里到处写: + +- `if summary { ... }` +- `if immersiveTranslate { ... }` + +更合适的方式是: + +- `ContentProcessOption` 增加一个 HTML optimize config struct +- caller 只声明“我想要哪种保留等级” +- 具体 tag/attr/placeholder 规则由 optimizer 内部统一决定 + +### 4.3 规则集中、可维护 + +attr 和 tag 规则应该集中定义,不应散落在 DOM 遍历逻辑里。后续新增规则时,最好只修改一处规则表或 helper。 + +### 4.4 保持 v1 简洁 + +这次优化是为了减少 token,不是为了做完整 HTML sanitizer,也不是为了构建一个高度可配置的通用清洗框架。v1 只处理最有收益、最确定的部分。 + +## 5. 推荐的数据结构 + +建议在 `internal/util/` 中增加一个小型配置模型,例如: + +- `ContentProcessOption` 增加 `OptimizeHTML *HTMLOptimizeConfig` +- `HTMLOptimizeConfig` 只表达少数关键维度,例如: + - 优化等级 / preservation profile + - 是否保留链接 + - 是否保留图片 + - 长 URL / `data:` URI 的替换阈值(可内部默认) + +更推荐的做法是: + +- 对外暴露少量 profile 或 level +- 内部再映射成实际规则集 + +这样可以同时满足: + +- 外部调用简洁 +- 内部实现可演进 +- 不会让调用方承担太多细节决策 + +## 6. HTML 优化的核心步骤 + +建议优化器按以下顺序工作: + +1. 判断输入是否像 HTML;非 HTML 内容直接跳过 +2. 用 DOM 解析 HTML +3. 删除无意义节点 +4. 清理低价值属性 +5. 根据配置决定是否保留图片、链接等结构 +6. 对超长属性值做 placeholder 替换 +7. 序列化 HTML +8. 压缩空白 + +### 6.1 建议直接删除的节点 + +v1 可先覆盖这些明显无价值的元素: + +- `script` +- `style` +- `noscript` +- `template` +- `iframe` + +这些元素通常不会帮助 RSS 抽取、摘要、翻译或分类,保留它们只会浪费 token。 + +### 6.2 建议清理的属性 + +低价值属性建议集中通过规则清理,例如: + +- `class` +- `id` +- `style` +- `aria-*` +- `data-*` +- `on*` 事件属性 + +同时保留可能有语义价值的属性,具体是否保留也可受 profile 影响,例如: + +- `href` +- `src` +- `srcset` +- `alt` +- `title` + +### 6.3 超长字段替换 + +对以下字段做可逆 placeholder 压缩: + +- 很长的 `href` +- 很长的 `src` +- 很长的 `srcset` +- base64 `data:` URI + +例如替换为: + +- `__FC_PH_URL_0001__` +- `__FC_PH_DATA_0002__` + +并维护 request-scoped map。这样可以避免: + +- 把极长字符串直接发给 LLM +- 使用全局共享状态 +- 后续恢复时依赖不透明上下文 + +## 7. placeholder 恢复策略 + +v1 不需要把恢复逻辑强行塞进所有 LLM 流程里。 + +推荐做法: + +- 优化器返回优化后的 HTML 和 placeholder map +- 只有确实需要恢复的流程才使用 restore helper +- placeholder map 生命周期限定在单次调用上下文内 + +这样能保持实现足够简单,同时给像 `beautify` 这种对原始 URL 保真度更敏感的流程留下扩展空间。 + +## 8. 与现有代码的集成点 + +### 8.1 统一入口 + +优先复用: + +- `internal/adapter/llm.go` +- `internal/util/content_processor.go` + +也就是继续通过 `ProcessContent` 统一处理大部分 LLM 输入。 + +### 8.2 主要调用方 + +后续实现时,至少应考虑这些现有路径: + +- `internal/craft/common_llm_logic.go` + + - 使用更激进的 profile + - 适合 filter / condition / summary 一类语义型任务 + +- `internal/craft/translate.go` + + - 使用更保守的 profile + - 常规翻译需要保留更多结构与资源 + +- immersive translate + + - 使用最保守的 profile + - 尽量保留链接、图片和格式 + +- `internal/craft/beautify.go` + - 当前会直接把原始 HTML 拼进 prompt + - 需要在 prompt 构造前引入同一套优化逻辑 + +## 9. 规则维护方式 + +为了让代码保持简洁、清晰、优雅,建议使用以下形式管理规则: + +- 一组集中定义的 removable tags +- 一组集中定义的 removable attrs +- 一组 prefix-based removable attr rules +- 一组 profile-aware preserved attrs + +DOM 遍历时只调用这些 helper,不在遍历逻辑里堆积大量 if/else。 + +这样做的优点: + +- 可读性更好 +- 修改规则时影响范围小 +- 容易为不同 profile 扩展行为 +- 更容易测试每条规则的预期 + +## 10. v1 明确不做的事情 + +为了保持范围收敛,这个方案暂时不处理: + +- 完整 HTML 安全清洗 +- 复杂 CSS 可见性推断 +- 语义重排或正文重写 +- 过度细粒度的用户可配置规则 +- 所有 URL 一律替换 +- 对所有 LLM 返回结果做全局 placeholder 自动恢复 + +## 11. 测试策略 + +这部分必须单独补充测试,不能只依赖集成路径顺带覆盖。 + +### 11.1 独立 optimizer 单测 + +建议增加单独测试文件,例如: + +- `internal/util/content_processor_test.go` +- 或 `internal/util/html_optimize_test.go` + +重点验证: + +- 无意义节点是否被移除 +- 低价值属性是否被清理 +- 关键属性是否按 profile 保留 +- whitespace 是否被正确压缩 +- 长 URL / `data:` URI 是否被替换为 placeholder +- placeholder 恢复是否正确 +- 非 HTML 输入是否安全透传 + +### 11.2 profile 差异测试 + +需要专门验证不同优化等级的行为差异,而不是只测单一输出。 + +例如: + +- aggressive profile 是否删除更多噪音 +- preserve profile 是否保留图片和链接 +- immersive profile 是否比 preserve profile 保留更多原始结构 + +### 11.3 `ProcessContent` 组合测试 + +继续验证它和现有逻辑的组合行为: + +- optimize + `ConvertToMd` +- optimize + `RemoveImage` +- optimize + `RemoveLinks` +- 多个步骤叠加时顺序是否稳定 + +### 11.4 调用路径 smoke test + +对主要 craft 调用路径补充轻量 smoke coverage,确保它们确实选用了正确 profile。 + +## 12. 预期收益 + +如果实现得当,这套机制会带来以下收益: + +- 减少 LLM 输入 token 消耗 +- 提高 prompt 中有效语义密度 +- 让 summary / filter 等场景更稳定 +- 让 translate / immersive-translate 在保真和成本之间更可控 +- 为后续更细致的 HTML 内容优化打下可扩展基础 + +## 13. 总结 + +这项工作的关键不在于“删得越多越好”,而在于: + +- 用统一入口做预处理 +- 用小而清晰的 config 表达不同保留等级 +- 用集中规则保持 tag / attr 逻辑可维护 +- 用独立测试确保优化结果稳定 + +如果这几个点把握好,FeedCraft 就能在不明显增加架构复杂度的前提下,让 HTML 进入 LLM 前变得更轻、更干净、更适配不同 craft 的需求。 diff --git a/proposal/inbox_source_design.md b/proposal/inbox_source_design.md index 37505940..2381ba87 100644 --- a/proposal/inbox_source_design.md +++ b/proposal/inbox_source_design.md @@ -61,6 +61,12 @@ type InboxItem struct { } ``` +其中: + +- `Content` 存储文章正文内容 +- `URL` 优先保存调用方提供的原文链接;若 POST 写入时该字段为空,则服务端自动回填为 FeedCraft 内部内容访问地址:`/:inbox_id/:article_id` +- 上述路由中的 `article_id` 对应 `InboxItem.ItemID` + `(InboxID, ItemID)` 建联合唯一索引,用于去重(见 5.6)。 ### 4.3 InboxToken @@ -109,41 +115,44 @@ Header: Authorization: Bearer [{ "title": "hello" }] ``` +如果 `url` 为空或省略,服务端会在写入时自动回填:`/:inbox_id/:article_id`。 + ### 5.3 字段说明 -| 字段 | 类型 | 必填 | 说明 | -| ----------- | ------ | ------ | ----------------------------------------------------------- | -| `title` | string | **是** | 标题 | -| `url` | string | 否 | 原文链接 | -| `content` | string | 否 | 正文内容(支持 HTML) | -| `summary` | string | 否 | 摘要文本,不填则自动截取 content 前 200 字符 | -| `id` | string | 否 | 唯一标识,用于去重。不填则服务端自动生成 UUID(不参与去重) | -| `author` | string | 否 | 作者名 | -| `timestamp` | number | 否 | Unix 秒级时间戳,不填则使用服务端当前时间 | +| 字段 | 类型 | 必填 | 说明 | +| ----------- | ------ | ------ | ----------------------------------------------------------------- | +| `title` | string | **是** | 标题 | +| `url` | string | 否 | 原文链接;若为空或省略,则服务端自动填充为 Inbox 文章内容访问地址 | +| `content` | string | 否 | 正文内容(支持 HTML) | +| `summary` | string | 否 | 摘要文本,不填则自动截取 content 前 200 字符 | +| `id` | string | 否 | 唯一标识,用于去重。不填则服务端自动生成 UUID(不参与去重) | +| `author` | string | 否 | 作者名 | +| `timestamp` | number | 否 | Unix 秒级时间戳,不填则使用服务端当前时间 | ### 5.4 POST 请求体与 DB 模型的映射 -| POST 字段 | DB 字段 (InboxItem) | 默认值逻辑 | -| ------------ | ------------------- | --------------------------------------------- | -| _(URL 路径)_ | `InboxID` | 从 URL 路径 `/api/inbox/:inbox_id/items` 提取 | -| `title` | `Title` | 直接存储 | -| `url` | `URL` | 直接存储 | -| `content` | `Content` | 直接存储 | -| `summary` | `Summary` | 不填则截取 Content 前 200 字符 | -| `id` | `ItemID` | 不填则服务端生成 UUID | -| `author` | `Author` | 直接存储 | -| `timestamp` | `PublishedAt` | 不填则 = `CreatedAt` | -| _(无)_ | `ID` | DB 自增主键 | -| _(无)_ | `CreatedAt` | GORM 自动填充为入库时间 | +| POST 字段 | DB 字段 (InboxItem) | 默认值逻辑 | +| ------------ | ------------------- | ------------------------------------------------------------------------------------ | +| _(URL 路径)_ | `InboxID` | 从 URL 路径 `/api/inbox/:inbox_id/items` 提取 | +| `title` | `Title` | 直接存储 | +| `url` | `URL` | 优先使用请求值;若为空则自动填充为 `/:inbox_id/:article_id` | +| `content` | `Content` | 直接存储 | +| `summary` | `Summary` | 不填则截取 Content 前 200 字符 | +| `id` | `ItemID` | 不填则服务端生成 UUID | +| `author` | `Author` | 直接存储 | +| `timestamp` | `PublishedAt` | 不填则 = `CreatedAt` | +| _(无)_ | `ID` | DB 自增主键 | +| _(无)_ | `CreatedAt` | GORM 自动填充为入库时间 | ### 5.5 写入行为 1. 验证 token 有效性(查 `inbox_tokens` 表) 2. 验证 `inbox_id` 对应的 inbox 存在 3. 校验所有条目(每条必须有 `title`,总数不超过 100)。任一条校验失败则整批拒绝,返回 400 -4. 填充默认值(`id` → UUID, `timestamp` → 当前时间, `summary` → 截取 content) -5. 批量 upsert `InboxItem` -6. 滚动清理:查询该 inbox 当前总条数,若超过 `max_items`,删除最旧的记录使总数 = `max_items` +4. 填充默认值(`id` → UUID, `timestamp` → 当前时间, `summary` → 截取 content 前 200 字符) +5. 若 `url` 为空或省略,则服务端根据 `/:inbox_id/:article_id` 自动生成内容访问地址,其中 `article_id = ItemID` +6. 批量 upsert `InboxItem` +7. 滚动清理:查询该 inbox 当前总条数,若超过 `max_items`,删除最旧的记录使总数 = `max_items` ### 5.6 去重机制 @@ -287,7 +296,15 @@ Inbox 可以通过 `feedcraft://recipe/:id` 间接被 Topic 引用:先建一 | -------- | ---------------------------- | -------------------------------------- | | **POST** | `/api/inbox/:inbox_id/items` | 批量写入数据(JSON 数组,上限 100 条) | -### 9.2 管理后台(JWT 鉴权) +### 9.2 内容读取(公开访问) + +| 方法 | 路径 | 说明 | +| ------- | ------------------------------------------------- | --------------------------------- | +| **GET** | `/:inbox_id/:article_id` | 返回对应文章的 `content` 字段内容 | + +该路由中的 `article_id` 对应 `InboxItem.ItemID`。如果某条数据在 POST 写入时未提供 `url`,系统会自动将 `url` 回填为这个内容访问地址。 + +### 9.3 管理后台(JWT 鉴权) | 方法 | 路径 | 说明 | | ------ | ----------------------------- | ------------------ | diff --git a/web/admin/index.html b/web/admin/index.html index 164b2b7b..efd98079 100644 --- a/web/admin/index.html +++ b/web/admin/index.html @@ -2,11 +2,7 @@ - + FeedCraft Admin Panel diff --git a/web/admin/package-lock.json b/web/admin/package-lock.json index 321ae8a0..9cf1986b 100644 --- a/web/admin/package-lock.json +++ b/web/admin/package-lock.json @@ -17,6 +17,7 @@ "echarts": "^5.5.1", "events": "^3.3.0", "js-md5": "^0.8.3", + "limax": "^4.2.2", "lodash": "^4.17.21", "mitt": "^3.0.0", "nprogress": "^0.2.0", @@ -110,7 +111,6 @@ "resolved": "https://registry.npmjs.org/@arco-design/web-vue/-/web-vue-2.57.0.tgz", "integrity": "sha512-R5YReC3C2sG3Jv0+YuR3B7kzkq2KdhhQNCGXD8T11xAoa0zMt6SWTP1xJQOdZcM9du+q3z6tk5mRvh4qkieRJw==", "license": "MIT", - "peer": true, "dependencies": { "@arco-design/color": "^0.4.0", "b-tween": "^0.3.3", @@ -182,7 +182,6 @@ "integrity": "sha512-e7jT4DxYvIDLk1ZHmU/m/mB19rex9sv0c2ftBtjSBv+kVM/902eh0fINUzD7UwLLNR+jU585GxUJ8/EBfAM5fw==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@babel/code-frame": "^7.27.1", "@babel/generator": "^7.28.5", @@ -1131,7 +1130,6 @@ "integrity": "sha512-VlJEV0fOQ7BExOsHYAGrgbEiZoi8D+Bl2+f6V2RrXerRSylnp+ZBHmPvaIa8cz0Ajx7WO7Z5RqfgYg7ED1nRhA==", "dev": true, "license": "BSD-2-Clause", - "peer": true, "dependencies": { "@typescript-eslint/scope-manager": "5.62.0", "@typescript-eslint/types": "5.62.0", @@ -1802,7 +1800,6 @@ "resolved": "https://registry.npmjs.org/@vue/runtime-core/-/runtime-core-3.5.25.tgz", "integrity": "sha512-Z751v203YWwYzy460bzsYQISDfPjHTl+6Zzwo/a3CsAf+0ccEjQ8c+0CdX1WsumRTHeywvyUFtW6KvNukT/smA==", "license": "MIT", - "peer": true, "dependencies": { "@vue/reactivity": "3.5.25", "@vue/shared": "3.5.25" @@ -1881,7 +1878,6 @@ "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==", "dev": true, "license": "MIT", - "peer": true, "bin": { "acorn": "bin/acorn" }, @@ -2319,7 +2315,6 @@ } ], "license": "MIT", - "peer": true, "dependencies": { "baseline-browser-mapping": "^2.8.25", "caniuse-lite": "^1.0.30001754", @@ -3196,7 +3191,6 @@ "resolved": "https://registry.npmjs.org/echarts/-/echarts-5.6.0.tgz", "integrity": "sha512-oTbVTsXfKuEhxftHqL5xprgLoc0k7uScAwtryCgWF6hPYFLRwOUHiFmHGCBKP5NPFNkDVopOieyUqYGH8Fa3kA==", "license": "Apache-2.0", - "peer": true, "dependencies": { "tslib": "2.3.0", "zrender": "5.6.1" @@ -3805,7 +3799,6 @@ "deprecated": "This version is no longer supported. Please see https://eslint.org/version-support for other options.", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@eslint-community/eslint-utils": "^4.2.0", "@eslint-community/regexpp": "^4.6.1", @@ -3990,7 +3983,6 @@ "integrity": "sha512-whOE1HFo/qJDyX4SnXzP4N6zOWn79WhnCUY/iDR0mPfQZO8wcYE4JClzI2oZrhBnnMUCBCHZhO6VQyoBU95mZA==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@rtsao/scc": "^1.1.0", "array-includes": "^3.1.9", @@ -4882,6 +4874,15 @@ "he": "bin/he" } }, + "node_modules/hepburn": { + "version": "1.2.2", + "resolved": "https://registry.npmjs.org/hepburn/-/hepburn-1.2.2.tgz", + "integrity": "sha512-DeykBc4XmfAWsnN+Y1Svi9uaQnnz21Q/ARuGWvIBxP1iUFeMIWL41DfVkgTh7tU23LFIbmIBO2Bk17BTPu0kVA==", + "license": "Apache-2.0", + "engines": { + "node": ">=4" + } + }, "node_modules/htmlparser2": { "version": "8.0.2", "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-8.0.2.tgz", @@ -5524,7 +5525,6 @@ "integrity": "sha512-/imKNG4EbWNrVjoNC/1H5/9GFy+tqjGBHCaSsN+P2RnPqjsLmv6UD3Ej+Kj8nBWaRAwyk7kK5ZUc+OEatnTR3A==", "dev": true, "license": "MIT", - "peer": true, "bin": { "jiti": "bin/jiti.js" } @@ -5618,7 +5618,6 @@ "integrity": "sha512-j1n1IuTX1VQjIy3tT7cyGbX7nvQOsFLoIqobZv4ttI5axP923gA44zUj6miiA6R5Aoms4sEGVIIcucXUbRI14g==", "dev": true, "license": "Apache-2.0", - "peer": true, "dependencies": { "copy-anything": "^2.0.1", "parse-node-version": "^1.0.1", @@ -5664,6 +5663,20 @@ "node": ">=10" } }, + "node_modules/limax": { + "version": "4.2.3", + "resolved": "https://registry.npmjs.org/limax/-/limax-4.2.3.tgz", + "integrity": "sha512-KJ41cc23/i9rGgFjpLpz4LVFuVHIVUoQX3zJs8qCY+Eo1KReIPD8QTahSErdWuO1Zt7CyjuLdo4vtsH60qau8Q==", + "license": "Apache-2.0", + "dependencies": { + "hepburn": "^1.2.2", + "pinyin-pro": "^3.27.0", + "speakingurl": "^14.0.1" + }, + "engines": { + "node": ">=14" + } + }, "node_modules/lines-and-columns": { "version": "1.2.4", "resolved": "https://registry.npmjs.org/lines-and-columns/-/lines-and-columns-1.2.4.tgz", @@ -6609,6 +6622,12 @@ } } }, + "node_modules/pinyin-pro": { + "version": "3.28.1", + "resolved": "https://registry.npmjs.org/pinyin-pro/-/pinyin-pro-3.28.1.tgz", + "integrity": "sha512-oqz8ulwRgtUXRi0vbqEfGNly19zpyCxYrjhkk5TibGcgSW6eNwS5woajCXRwqURi8Ehc2yOFTiB4uNoZ+NJOnA==", + "license": "MIT" + }, "node_modules/pirates": { "version": "4.0.7", "resolved": "https://registry.npmjs.org/pirates/-/pirates-4.0.7.tgz", @@ -6662,7 +6681,6 @@ } ], "license": "MIT", - "peer": true, "dependencies": { "nanoid": "^3.3.11", "picocolors": "^1.1.1", @@ -6819,7 +6837,6 @@ "integrity": "sha512-tdN8qQGvNjw4CHbY+XXk0JgCXn9QiF21a55rBe5LJAU+kDyC4WQn4+awm2Xfk2lQMk5fKup9XgzTZtGkjBdP9Q==", "dev": true, "license": "MIT", - "peer": true, "bin": { "prettier": "bin-prettier.js" }, @@ -7129,7 +7146,6 @@ "integrity": "sha512-fS6iqSPZDs3dr/y7Od6y5nha8dW1YnbgtsyotCVvoFGKbERG++CVRFv1meyGDE1SNItQA8BrnCw7ScdAhRJ3XQ==", "dev": true, "license": "MIT", - "peer": true, "bin": { "rollup": "dist/bin/rollup" }, @@ -7557,6 +7573,15 @@ "node": ">=0.10.0" } }, + "node_modules/speakingurl": { + "version": "14.0.1", + "resolved": "https://registry.npmjs.org/speakingurl/-/speakingurl-14.0.1.tgz", + "integrity": "sha512-1POYv7uv2gXoyGFpBCmpDVSNV74IfsWlDW216UPjbWufNf+bSU6GdbDsxdcxtfwb4xlI3yxzOTKClUosxARYrQ==", + "license": "BSD-3-Clause", + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/split-on-first": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/split-on-first/-/split-on-first-3.0.0.tgz", @@ -8019,7 +8044,6 @@ "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", "dev": true, "license": "MIT", - "peer": true, "engines": { "node": ">=12" }, @@ -8212,7 +8236,6 @@ "integrity": "sha512-1FXk9E2Hm+QzZQ7z+McJiHL4NW1F2EzMu9Nq9i3zAaGqibafqYwCVU6WyWAuyQRRzOlxou8xZSyXLEN8oKj24g==", "devOptional": true, "license": "Apache-2.0", - "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" @@ -8407,7 +8430,6 @@ "integrity": "sha512-K/jGKL/PgbIgKCiJo5QbASQhFiV02X9Jh+Qq0AKCRCRKZtOTVi4t6wh75FDpGf2N9rYOnzH87OEFQNaFy6pdxQ==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "esbuild": "^0.15.9", "postcss": "^8.4.18", @@ -8508,7 +8530,6 @@ "resolved": "https://registry.npmjs.org/vue/-/vue-3.5.25.tgz", "integrity": "sha512-YLVdgv2K13WJ6n+kD5owehKtEXwdwXuj2TTyJMsO7pSeKw2bfRNZGjhB7YzrpbMYj5b5QsUebHpOqR3R3ziy/g==", "license": "MIT", - "peer": true, "dependencies": { "@vue/compiler-dom": "3.5.25", "@vue/compiler-sfc": "3.5.25", diff --git a/web/admin/src/api/feed_viewer.ts b/web/admin/src/api/feed_viewer.ts new file mode 100644 index 00000000..937df612 --- /dev/null +++ b/web/admin/src/api/feed_viewer.ts @@ -0,0 +1,45 @@ +import axios from 'axios'; +import { APIResponse } from '@/api/types'; + +export interface FeedViewerPreviewImage { + url: string; + title: string; +} + +export interface FeedViewerPreviewItem { + guid: string; + title: string; + link: string; + pubDate: string; + isoDate: string; + content: string; + contentSnippet: string; +} + +export interface FeedViewerPreview { + title: string; + description: string; + link: string; + feedUrl: string; + copyright: string; + image?: FeedViewerPreviewImage; + items: FeedViewerPreviewItem[]; +} + +export interface PreviewFeedOptions { + craftName?: string; +} + +export function previewFeed( + inputUrl: string, + options: PreviewFeedOptions = {} +): Promise> { + return axios + .get>('/api/admin/tools/feed/preview', { + params: { + input_url: inputUrl, + craft_name: options.craftName, + }, + }) + .then((res) => res.data); +} diff --git a/web/admin/src/locale/en-US.ts b/web/admin/src/locale/en-US.ts index 325042be..391cd88b 100644 --- a/web/admin/src/locale/en-US.ts +++ b/web/admin/src/locale/en-US.ts @@ -13,7 +13,7 @@ import localeFeedViewer from '@/locale/en-US/feedViewer'; import localeLlmDebug from '@/locale/en-US/llmDebug'; import localeUrlGenerator from '@/locale/en-US/urlGenerator'; import localeHtmlToRss from '@/locale/en-US/htmlToRss'; -import localeCurlToRss from '@/locale/en-US/curlToRss'; +import localeJsonToRss from '@/locale/en-US/jsonToRss'; import localeSearchToRss from '@/locale/en-US/searchToRss'; import localeDependencyService from '@/locale/en-US/dependencyService'; import localeHealth from '@/locale/en-US/health'; @@ -34,7 +34,7 @@ export default { ...localeLlmDebug, ...localeUrlGenerator, ...localeHtmlToRss, - ...localeCurlToRss, + ...localeJsonToRss, ...localeSearchToRss, ...localeDependencyService, ...localeHealth, diff --git a/web/admin/src/locale/en-US/curlToRss.ts b/web/admin/src/locale/en-US/curlToRss.ts deleted file mode 100644 index 12d0d0de..00000000 --- a/web/admin/src/locale/en-US/curlToRss.ts +++ /dev/null @@ -1,104 +0,0 @@ -export default { - 'curlToRss.title': 'Curl to RSS', - 'curlToRss.description': - 'Generate RSS feeds from any JSON API with field extraction rules and optional templates.', - 'curlToRss.step.requestConfig': 'Request Config', - 'curlToRss.step.requestConfig.desc': 'Configure JSON source', - 'curlToRss.step.parsingRules': 'Parsing Rules', - 'curlToRss.step.parsingRules.desc': - 'Define field extraction and output templates', - 'curlToRss.step.feedMetadata': 'Feed Metadata', - 'curlToRss.step.feedMetadata.desc': 'Set feed details', - 'curlToRss.step.saveRecipe': 'Save Recipe', - 'curlToRss.step.saveRecipe.desc': 'Save as Custom Recipe', - 'curlToRss.step1.alert': - 'Configure the HTTP request to fetch the JSON data. You can import from a cURL command.', - 'curlToRss.step1.curlCommand': - 'Curl Command (Optional - Paste here and click Import)', - 'curlToRss.step1.import': 'Import', - 'curlToRss.step1.method': 'Method', - 'curlToRss.step1.url': 'URL', - 'curlToRss.step1.headers': 'Headers', - 'curlToRss.step1.requestBody': 'Request Body', - 'curlToRss.step1.add': 'Add', - 'curlToRss.step1.fetchAndNext': 'Fetch & Next', - 'curlToRss.step2.responseJson': 'Response JSON', - 'curlToRss.step2.alert': - 'Use jq selectors to extract raw values, then optionally use templates to build the final field output. The items selector supports .data.items and .data.items[].', - 'curlToRss.step2.iteration': 'Iteration', - 'curlToRss.step2.itemsIterator': - 'Items Iterator (e.g. .data.items or .items[])', - 'curlToRss.step2.itemFields': 'Item Fields', - 'curlToRss.step2.titleSelector': 'Title Selector', - 'curlToRss.step2.titleTemplate': 'Title Template (Optional)', - 'curlToRss.step2.linkSelector': 'Link Selector', - 'curlToRss.step2.linkTemplate': 'Link Template (Optional)', - 'curlToRss.step2.dateSelector': 'Date Selector', - 'curlToRss.step2.dateTemplate': 'Date Template (Optional)', - 'curlToRss.step2.contentSelector': 'Content/Summary Selector', - 'curlToRss.step2.contentTemplate': 'Content/Summary Template (Optional)', - 'curlToRss.step2.templateHelpTitle': 'Template Help', - 'curlToRss.step2.templateHelpDesc': - 'Selectors extract raw values from the current item. Templates let you combine, clean up, or fill in the final text using the current item and extracted fields.', - 'curlToRss.step2.templateExamples': - 'Examples:\nBuild a link: https://some-website.com/article/{{ .Item.id }}\nTrim a title: {{ .Fields.Title | trimSpace }}\nFallback summary: {{ default .Fields.Description "No summary" }}', - 'curlToRss.step2.previewResults': 'Preview Results ({count})', - 'curlToRss.step2.previewPlaceholder': 'Preview results will appear here', - 'curlToRss.step2.previewPlaceholder.help': - 'Configure selectors above and click "Run Preview"', - 'curlToRss.step2.runPreview': 'Run Preview', - 'curlToRss.step2.nextStep': 'Next Step', - 'curlToRss.step3.alert': - 'Successfully extracted {count} items! Now configure the feed metadata.', - 'curlToRss.step3.feedTitle': 'Feed Title', - 'curlToRss.step3.feedDescription': 'Feed Description', - 'curlToRss.step3.siteLink': 'Site Link', - 'curlToRss.step3.authorName': 'Author Name', - 'curlToRss.step3.authorEmail': 'Author Email', - 'curlToRss.step4.reviewAndSave': 'Review & Save', - 'curlToRss.step4.summary': 'Summary', - 'curlToRss.step4.sourceUrl': 'Source URL', - 'curlToRss.step4.itemCount': 'Item Count', - 'curlToRss.step4.recipeId': 'Recipe ID (URL Path)', - 'curlToRss.step4.recipeId.help': - 'This will be the unique identifier in the URL.', - 'curlToRss.step4.internalDescription': 'Internal Description', - 'curlToRss.step4.confirmAndSave': 'Confirm & Save Recipe', - 'curlToRss.common.back': 'Back', - 'curlToRss.common.next': 'Next', - 'curlToRss.msg.enterCurl': 'Please enter a curl command', - 'curlToRss.msg.curlParsed': 'Curl parsed successfully', - 'curlToRss.msg.urlRequired': 'URL is required', - 'curlToRss.msg.fetched': 'Fetched successfully', - 'curlToRss.msg.invalidJson': 'Response is not a valid JSON', - 'curlToRss.msg.emptyResponse': 'Empty response', - 'curlToRss.msg.iteratorRequired': 'Items Iterator selector is required', - 'curlToRss.msg.titleRequired': 'Title selector is required', - 'curlToRss.msg.noItems': 'No items found with current selectors', - 'curlToRss.msg.parsedItems': 'Parsed {count} items', - 'curlToRss.msg.feedTitleRequired': 'Feed Title is required', - 'curlToRss.msg.recipeIdRequired': 'Recipe ID is required', - 'curlToRss.msg.saved': 'Recipe saved successfully!', - 'curlToRss.msg.saveFailed': 'Failed to save: {msg}', - 'curlToRss.placeholder.key': 'Key', - 'curlToRss.placeholder.value': 'Value', - 'curlToRss.placeholder.feedTitle': 'My Awesome Feed', - 'curlToRss.placeholder.feedDesc': 'A description of this feed', - 'curlToRss.placeholder.siteLink': 'https://example.com', - 'curlToRss.placeholder.recipeId': 'my-json-feed', - 'curlToRss.placeholder.internalDesc': 'Notes for yourself about this recipe', - 'curlToRss.placeholder.url': 'https://api.example.com/v1/posts', - 'curlToRss.placeholder.body': "{ 'foo': 'bar' }", - 'curlToRss.placeholder.curl': 'curl -X POST ...', - 'curlToRss.placeholder.items': '.items', - 'curlToRss.placeholder.title': '.title', - 'curlToRss.placeholder.titleTemplate': '{{ .Fields.Title | trimSpace }}', - 'curlToRss.placeholder.link': '.url', - 'curlToRss.placeholder.linkTemplate': - 'https://some-website.com/article/{{ .Item.id }}', - 'curlToRss.placeholder.date': '.created_at', - 'curlToRss.placeholder.dateTemplate': '{{ .Fields.Date }}', - 'curlToRss.placeholder.content': '.content', - 'curlToRss.placeholder.contentTemplate': - '{{ default .Fields.Description "No summary" }}', -}; diff --git a/web/admin/src/locale/en-US/health.ts b/web/admin/src/locale/en-US/health.ts index 91bdb570..6334276c 100644 --- a/web/admin/src/locale/en-US/health.ts +++ b/web/admin/src/locale/en-US/health.ts @@ -6,4 +6,5 @@ export default { 'health.missing': 'Missing', 'health.noData': 'No analysis data. Click Analyze to start.', 'health.fetchError': 'Failed to fetch dependency health data', + 'health.missingCrafts': 'Missing Crafts', }; diff --git a/web/admin/src/locale/en-US/jsonToRss.ts b/web/admin/src/locale/en-US/jsonToRss.ts new file mode 100644 index 00000000..633e0546 --- /dev/null +++ b/web/admin/src/locale/en-US/jsonToRss.ts @@ -0,0 +1,104 @@ +export default { + 'jsonToRss.title': 'JSON to RSS', + 'jsonToRss.description': + 'Generate RSS feeds from any JSON API with field extraction rules and optional templates.', + 'jsonToRss.step.requestConfig': 'Request Config', + 'jsonToRss.step.requestConfig.desc': 'Configure JSON source', + 'jsonToRss.step.parsingRules': 'Parsing Rules', + 'jsonToRss.step.parsingRules.desc': + 'Define field extraction and output templates', + 'jsonToRss.step.feedMetadata': 'Feed Metadata', + 'jsonToRss.step.feedMetadata.desc': 'Set feed details', + 'jsonToRss.step.saveRecipe': 'Save Recipe', + 'jsonToRss.step.saveRecipe.desc': 'Save as Custom Recipe', + 'jsonToRss.step1.alert': + 'Configure the HTTP request to fetch the JSON data. You can import from a cURL command.', + 'jsonToRss.step1.curlCommand': + 'cURL Command (Optional - Paste here and click Import)', + 'jsonToRss.step1.import': 'Import', + 'jsonToRss.step1.method': 'Method', + 'jsonToRss.step1.url': 'URL', + 'jsonToRss.step1.headers': 'Headers', + 'jsonToRss.step1.requestBody': 'Request Body', + 'jsonToRss.step1.add': 'Add', + 'jsonToRss.step1.fetchAndNext': 'Fetch & Next', + 'jsonToRss.step2.responseJson': 'Response JSON', + 'jsonToRss.step2.alert': + 'Use jq selectors to extract raw values, then optionally use templates to build the final field output. The items selector supports .data.items and .data.items[].', + 'jsonToRss.step2.iteration': 'Iteration', + 'jsonToRss.step2.itemsIterator': + 'Items Iterator (e.g. .data.items or .items[])', + 'jsonToRss.step2.itemFields': 'Item Fields', + 'jsonToRss.step2.titleSelector': 'Title Selector', + 'jsonToRss.step2.titleTemplate': 'Title Template (Optional)', + 'jsonToRss.step2.linkSelector': 'Link Selector', + 'jsonToRss.step2.linkTemplate': 'Link Template (Optional)', + 'jsonToRss.step2.dateSelector': 'Date Selector', + 'jsonToRss.step2.dateTemplate': 'Date Template (Optional)', + 'jsonToRss.step2.contentSelector': 'Content/Summary Selector', + 'jsonToRss.step2.contentTemplate': 'Content/Summary Template (Optional)', + 'jsonToRss.step2.templateHelpTitle': 'Template Help', + 'jsonToRss.step2.templateHelpDesc': + 'Selectors extract raw values from the current item. Templates let you combine, clean up, or fill in the final text using the current item and extracted fields.', + 'jsonToRss.step2.templateExamples': + 'Examples:\nBuild a link: https://some-website.com/article/{{ .Item.id }}\nTrim a title: {{ .Fields.Title | trimSpace }}\nFallback summary: {{ default .Fields.Description "No summary" }}', + 'jsonToRss.step2.previewResults': 'Preview Results ({count})', + 'jsonToRss.step2.previewPlaceholder': 'Preview results will appear here', + 'jsonToRss.step2.previewPlaceholder.help': + 'Configure selectors above and click "Run Preview"', + 'jsonToRss.step2.runPreview': 'Run Preview', + 'jsonToRss.step2.nextStep': 'Next Step', + 'jsonToRss.step3.alert': + 'Successfully extracted {count} items! Now configure the feed metadata.', + 'jsonToRss.step3.feedTitle': 'Feed Title', + 'jsonToRss.step3.feedDescription': 'Feed Description', + 'jsonToRss.step3.siteLink': 'Site Link', + 'jsonToRss.step3.authorName': 'Author Name', + 'jsonToRss.step3.authorEmail': 'Author Email', + 'jsonToRss.step4.reviewAndSave': 'Review & Save', + 'jsonToRss.step4.summary': 'Summary', + 'jsonToRss.step4.sourceUrl': 'Source URL', + 'jsonToRss.step4.itemCount': 'Item Count', + 'jsonToRss.step4.recipeId': 'Recipe ID (URL Path)', + 'jsonToRss.step4.recipeId.help': + 'This will be the unique identifier in the URL.', + 'jsonToRss.step4.internalDescription': 'Internal Description', + 'jsonToRss.step4.confirmAndSave': 'Confirm & Save Recipe', + 'jsonToRss.common.back': 'Back', + 'jsonToRss.common.next': 'Next', + 'jsonToRss.msg.enterCurl': 'Please enter a cURL command', + 'jsonToRss.msg.curlParsed': 'cURL parsed successfully', + 'jsonToRss.msg.urlRequired': 'URL is required', + 'jsonToRss.msg.fetched': 'Fetched successfully', + 'jsonToRss.msg.invalidJson': 'Response is not a valid JSON', + 'jsonToRss.msg.emptyResponse': 'Empty response', + 'jsonToRss.msg.iteratorRequired': 'Items Iterator selector is required', + 'jsonToRss.msg.titleRequired': 'Title selector is required', + 'jsonToRss.msg.noItems': 'No items found with current selectors', + 'jsonToRss.msg.parsedItems': 'Parsed {count} items', + 'jsonToRss.msg.feedTitleRequired': 'Feed Title is required', + 'jsonToRss.msg.recipeIdRequired': 'Recipe ID is required', + 'jsonToRss.msg.saved': 'Recipe saved successfully!', + 'jsonToRss.msg.saveFailed': 'Failed to save: {msg}', + 'jsonToRss.placeholder.key': 'Key', + 'jsonToRss.placeholder.value': 'Value', + 'jsonToRss.placeholder.feedTitle': 'My Awesome Feed', + 'jsonToRss.placeholder.feedDesc': 'A description of this feed', + 'jsonToRss.placeholder.siteLink': 'https://example.com', + 'jsonToRss.placeholder.recipeId': 'my-json-feed', + 'jsonToRss.placeholder.internalDesc': 'Notes for yourself about this recipe', + 'jsonToRss.placeholder.url': 'https://api.example.com/v1/posts', + 'jsonToRss.placeholder.body': "{ 'foo': 'bar' }", + 'jsonToRss.placeholder.curl': 'curl -X POST ...', + 'jsonToRss.placeholder.items': '.items', + 'jsonToRss.placeholder.title': '.title', + 'jsonToRss.placeholder.titleTemplate': '{{ .Fields.Title | trimSpace }}', + 'jsonToRss.placeholder.link': '.url', + 'jsonToRss.placeholder.linkTemplate': + 'https://some-website.com/article/{{ .Item.id }}', + 'jsonToRss.placeholder.date': '.created_at', + 'jsonToRss.placeholder.dateTemplate': '{{ .Fields.Date }}', + 'jsonToRss.placeholder.content': '.content', + 'jsonToRss.placeholder.contentTemplate': + '{{ default .Fields.Description "No summary" }}', +}; diff --git a/web/admin/src/locale/en-US/menu.ts b/web/admin/src/locale/en-US/menu.ts index 4aba7c4c..9d1b637b 100644 --- a/web/admin/src/locale/en-US/menu.ts +++ b/web/admin/src/locale/en-US/menu.ts @@ -31,7 +31,7 @@ export default { 'menu.feedCompare': 'Feed Compare', 'menu.feedViewer': 'RSS Viewer', 'menu.rssGenerator': 'HTML to RSS', - 'menu.curlToRss': 'Curl to RSS', + 'menu.jsonToRss': 'JSON to RSS', 'menu.searchToRss': 'Search to RSS', 'menu.llmDebug': 'LLM Debug', 'menu.changePassword': 'Change Password', diff --git a/web/admin/src/locale/zh-CN.ts b/web/admin/src/locale/zh-CN.ts index 7ed243c4..982b05b6 100644 --- a/web/admin/src/locale/zh-CN.ts +++ b/web/admin/src/locale/zh-CN.ts @@ -13,7 +13,7 @@ import localeFeedViewer from '@/locale/zh-CN/feedViewer'; import localeLlmDebug from '@/locale/zh-CN/llmDebug'; import localeUrlGenerator from '@/locale/zh-CN/urlGenerator'; import localeHtmlToRss from '@/locale/zh-CN/htmlToRss'; -import localeCurlToRss from '@/locale/zh-CN/curlToRss'; +import localeJsonToRss from '@/locale/zh-CN/jsonToRss'; import localeSearchToRss from '@/locale/zh-CN/searchToRss'; import localeDependencyService from '@/locale/zh-CN/dependencyService'; import localeHealth from '@/locale/zh-CN/health'; @@ -34,7 +34,7 @@ export default { ...localeLlmDebug, ...localeUrlGenerator, ...localeHtmlToRss, - ...localeCurlToRss, + ...localeJsonToRss, ...localeSearchToRss, ...localeDependencyService, ...localeHealth, diff --git a/web/admin/src/locale/zh-CN/curlToRss.ts b/web/admin/src/locale/zh-CN/curlToRss.ts deleted file mode 100644 index 8d1b5e76..00000000 --- a/web/admin/src/locale/zh-CN/curlToRss.ts +++ /dev/null @@ -1,98 +0,0 @@ -export default { - 'curlToRss.title': 'Curl 转 RSS', - 'curlToRss.description': - '通过字段提取规则和可选模板,从任何 JSON API 生成 RSS 订阅源。', - 'curlToRss.step.requestConfig': '请求配置', - 'curlToRss.step.requestConfig.desc': '配置 JSON 源', - 'curlToRss.step.parsingRules': '解析规则', - 'curlToRss.step.parsingRules.desc': '定义字段提取和结果模板', - 'curlToRss.step.feedMetadata': 'Feed 元数据', - 'curlToRss.step.feedMetadata.desc': '设置订阅详情', - 'curlToRss.step.saveRecipe': '保存配方', - 'curlToRss.step.saveRecipe.desc': '保存为自定义配方', - 'curlToRss.step1.alert': - '配置用于获取 JSON 数据的 HTTP 请求。您可以从 cURL 命令导入。', - 'curlToRss.step1.curlCommand': 'Curl 命令 (可选 - 粘贴到此处并点击导入)', - 'curlToRss.step1.import': '导入', - 'curlToRss.step1.method': '方法', - 'curlToRss.step1.url': 'URL', - 'curlToRss.step1.headers': '请求头 (Headers)', - 'curlToRss.step1.requestBody': '请求体 (Body)', - 'curlToRss.step1.add': '添加', - 'curlToRss.step1.fetchAndNext': '获取并下一步', - 'curlToRss.step2.responseJson': '响应 JSON', - 'curlToRss.step2.alert': - '先用 jq 选择器提取字段,再按需用模板加工最终结果。列表选择器支持 .data.items 或 .data.items[]。', - 'curlToRss.step2.iteration': '迭代', - 'curlToRss.step2.itemsIterator': '条目迭代器 (例如 .data.items 或 .items[])', - 'curlToRss.step2.itemFields': '条目字段', - 'curlToRss.step2.titleSelector': '标题提取规则', - 'curlToRss.step2.titleTemplate': '标题模板(可选)', - 'curlToRss.step2.linkSelector': '链接提取规则', - 'curlToRss.step2.linkTemplate': '链接模板(可选)', - 'curlToRss.step2.dateSelector': '日期提取规则', - 'curlToRss.step2.dateTemplate': '日期模板(可选)', - 'curlToRss.step2.contentSelector': '内容/摘要提取规则', - 'curlToRss.step2.contentTemplate': '内容/摘要模板(可选)', - 'curlToRss.step2.templateHelpTitle': '模板说明', - 'curlToRss.step2.templateHelpDesc': - '提取规则负责从当前条目取值,模板负责拼接、补全或清理最终文本。模板可访问当前条目对象和已提取字段。', - 'curlToRss.step2.templateExamples': - '示例:\n链接拼接:https://some-website.com/article/{{ .Item.id }}\n清理标题:{{ .Fields.Title | trimSpace }}\n默认摘要:{{ default .Fields.Description "暂无摘要" }}', - 'curlToRss.step2.previewResults': '预览结果 ({count})', - 'curlToRss.step2.previewPlaceholder': '预览结果将出现在这里', - 'curlToRss.step2.previewPlaceholder.help': '请配置上方选择器并点击“运行预览”', - 'curlToRss.step2.runPreview': '运行预览', - 'curlToRss.step2.nextStep': '下一步', - 'curlToRss.step3.alert': '成功提取 {count} 个条目!现在配置 Feed 元数据。', - 'curlToRss.step3.feedTitle': 'Feed 标题', - 'curlToRss.step3.feedDescription': 'Feed 描述', - 'curlToRss.step3.siteLink': '站点链接', - 'curlToRss.step3.authorName': '作者名称', - 'curlToRss.step3.authorEmail': '作者邮箱', - 'curlToRss.step4.reviewAndSave': '检查并保存', - 'curlToRss.step4.summary': '摘要', - 'curlToRss.step4.sourceUrl': '源 URL', - 'curlToRss.step4.itemCount': '条目数量', - 'curlToRss.step4.recipeId': '配方 ID (URL 路径)', - 'curlToRss.step4.recipeId.help': '这将是 URL 中的唯一标识符。', - 'curlToRss.step4.internalDescription': '内部描述', - 'curlToRss.step4.confirmAndSave': '确认并保存配方', - 'curlToRss.common.back': '返回', - 'curlToRss.common.next': '下一步', - 'curlToRss.msg.enterCurl': '请输入 curl 命令', - 'curlToRss.msg.curlParsed': 'Curl 解析成功', - 'curlToRss.msg.urlRequired': 'URL 是必填项', - 'curlToRss.msg.fetched': '获取成功', - 'curlToRss.msg.invalidJson': '响应不是有效的 JSON', - 'curlToRss.msg.emptyResponse': '响应为空', - 'curlToRss.msg.iteratorRequired': '条目迭代器选择器是必填项', - 'curlToRss.msg.titleRequired': '标题选择器是必填项', - 'curlToRss.msg.noItems': '当前选择器未找到任何条目', - 'curlToRss.msg.parsedItems': '解析了 {count} 个条目', - 'curlToRss.msg.feedTitleRequired': 'Feed 标题是必填项', - 'curlToRss.msg.recipeIdRequired': '配方 ID 是必填项', - 'curlToRss.msg.saved': '配方保存成功!', - 'curlToRss.msg.saveFailed': '保存失败: {msg}', - 'curlToRss.placeholder.key': 'Key', - 'curlToRss.placeholder.value': 'Value', - 'curlToRss.placeholder.feedTitle': '我的订阅源', - 'curlToRss.placeholder.feedDesc': '此订阅源的描述', - 'curlToRss.placeholder.siteLink': 'https://example.com', - 'curlToRss.placeholder.recipeId': 'my-json-feed', - 'curlToRss.placeholder.internalDesc': '关于此配方的备注', - 'curlToRss.placeholder.url': 'https://api.example.com/v1/posts', - 'curlToRss.placeholder.body': "{ 'foo': 'bar' }", - 'curlToRss.placeholder.curl': 'curl -X POST ...', - 'curlToRss.placeholder.items': '.items', - 'curlToRss.placeholder.title': '.title', - 'curlToRss.placeholder.titleTemplate': '{{ .Fields.Title | trimSpace }}', - 'curlToRss.placeholder.link': '.url', - 'curlToRss.placeholder.linkTemplate': - 'https://some-website.com/article/{{ .Item.id }}', - 'curlToRss.placeholder.date': '.created_at', - 'curlToRss.placeholder.dateTemplate': '{{ .Fields.Date }}', - 'curlToRss.placeholder.content': '.content', - 'curlToRss.placeholder.contentTemplate': - '{{ default .Fields.Description "暂无摘要" }}', -}; diff --git a/web/admin/src/locale/zh-CN/health.ts b/web/admin/src/locale/zh-CN/health.ts index 85beb527..9af622c9 100644 --- a/web/admin/src/locale/zh-CN/health.ts +++ b/web/admin/src/locale/zh-CN/health.ts @@ -6,4 +6,5 @@ export default { 'health.missing': '缺失', 'health.noData': '暂无分析数据,请点击分析按钮开始。', 'health.fetchError': '获取依赖健康状态失败', + 'health.missingCrafts': '缺失的 Crafts', }; diff --git a/web/admin/src/locale/zh-CN/jsonToRss.ts b/web/admin/src/locale/zh-CN/jsonToRss.ts new file mode 100644 index 00000000..f6df04e6 --- /dev/null +++ b/web/admin/src/locale/zh-CN/jsonToRss.ts @@ -0,0 +1,98 @@ +export default { + 'jsonToRss.title': 'JSON 转 RSS', + 'jsonToRss.description': + '通过字段提取规则和可选模板,从任何 JSON API 生成 RSS 订阅源。', + 'jsonToRss.step.requestConfig': '请求配置', + 'jsonToRss.step.requestConfig.desc': '配置 JSON 源', + 'jsonToRss.step.parsingRules': '解析规则', + 'jsonToRss.step.parsingRules.desc': '定义字段提取和结果模板', + 'jsonToRss.step.feedMetadata': 'Feed 元数据', + 'jsonToRss.step.feedMetadata.desc': '设置订阅详情', + 'jsonToRss.step.saveRecipe': '保存配方', + 'jsonToRss.step.saveRecipe.desc': '保存为自定义配方', + 'jsonToRss.step1.alert': + '配置用于获取 JSON 数据的 HTTP 请求。您可以从 cURL 命令导入。', + 'jsonToRss.step1.curlCommand': 'cURL 命令 (可选 - 粘贴到此处并点击导入)', + 'jsonToRss.step1.import': '导入', + 'jsonToRss.step1.method': '方法', + 'jsonToRss.step1.url': 'URL', + 'jsonToRss.step1.headers': '请求头 (Headers)', + 'jsonToRss.step1.requestBody': '请求体 (Body)', + 'jsonToRss.step1.add': '添加', + 'jsonToRss.step1.fetchAndNext': '获取并下一步', + 'jsonToRss.step2.responseJson': '响应 JSON', + 'jsonToRss.step2.alert': + '先用 jq 选择器提取字段,再按需用模板加工最终结果。列表选择器支持 .data.items 或 .data.items[]。', + 'jsonToRss.step2.iteration': '迭代', + 'jsonToRss.step2.itemsIterator': '条目迭代器 (例如 .data.items 或 .items[])', + 'jsonToRss.step2.itemFields': '条目字段', + 'jsonToRss.step2.titleSelector': '标题提取规则', + 'jsonToRss.step2.titleTemplate': '标题模板(可选)', + 'jsonToRss.step2.linkSelector': '链接提取规则', + 'jsonToRss.step2.linkTemplate': '链接模板(可选)', + 'jsonToRss.step2.dateSelector': '日期提取规则', + 'jsonToRss.step2.dateTemplate': '日期模板(可选)', + 'jsonToRss.step2.contentSelector': '内容/摘要提取规则', + 'jsonToRss.step2.contentTemplate': '内容/摘要模板(可选)', + 'jsonToRss.step2.templateHelpTitle': '模板说明', + 'jsonToRss.step2.templateHelpDesc': + '提取规则负责从当前条目取值,模板负责拼接、补全或清理最终文本。模板可访问当前条目对象和已提取字段。', + 'jsonToRss.step2.templateExamples': + '示例:\n链接拼接:https://some-website.com/article/{{ .Item.id }}\n清理标题:{{ .Fields.Title | trimSpace }}\n默认摘要:{{ default .Fields.Description "暂无摘要" }}', + 'jsonToRss.step2.previewResults': '预览结果 ({count})', + 'jsonToRss.step2.previewPlaceholder': '预览结果将出现在这里', + 'jsonToRss.step2.previewPlaceholder.help': '请配置上方选择器并点击“运行预览”', + 'jsonToRss.step2.runPreview': '运行预览', + 'jsonToRss.step2.nextStep': '下一步', + 'jsonToRss.step3.alert': '成功提取 {count} 个条目!现在配置 Feed 元数据。', + 'jsonToRss.step3.feedTitle': 'Feed 标题', + 'jsonToRss.step3.feedDescription': 'Feed 描述', + 'jsonToRss.step3.siteLink': '站点链接', + 'jsonToRss.step3.authorName': '作者名称', + 'jsonToRss.step3.authorEmail': '作者邮箱', + 'jsonToRss.step4.reviewAndSave': '检查并保存', + 'jsonToRss.step4.summary': '摘要', + 'jsonToRss.step4.sourceUrl': '源 URL', + 'jsonToRss.step4.itemCount': '条目数量', + 'jsonToRss.step4.recipeId': '配方 ID (URL 路径)', + 'jsonToRss.step4.recipeId.help': '这将是 URL 中的唯一标识符。', + 'jsonToRss.step4.internalDescription': '内部描述', + 'jsonToRss.step4.confirmAndSave': '确认并保存配方', + 'jsonToRss.common.back': '返回', + 'jsonToRss.common.next': '下一步', + 'jsonToRss.msg.enterCurl': '请输入 cURL 命令', + 'jsonToRss.msg.curlParsed': 'cURL 解析成功', + 'jsonToRss.msg.urlRequired': 'URL 是必填项', + 'jsonToRss.msg.fetched': '获取成功', + 'jsonToRss.msg.invalidJson': '响应不是有效的 JSON', + 'jsonToRss.msg.emptyResponse': '响应为空', + 'jsonToRss.msg.iteratorRequired': '条目迭代器选择器是必填项', + 'jsonToRss.msg.titleRequired': '标题选择器是必填项', + 'jsonToRss.msg.noItems': '当前选择器未找到任何条目', + 'jsonToRss.msg.parsedItems': '解析了 {count} 个条目', + 'jsonToRss.msg.feedTitleRequired': 'Feed 标题是必填项', + 'jsonToRss.msg.recipeIdRequired': '配方 ID 是必填项', + 'jsonToRss.msg.saved': '配方保存成功!', + 'jsonToRss.msg.saveFailed': '保存失败: {msg}', + 'jsonToRss.placeholder.key': 'Key', + 'jsonToRss.placeholder.value': 'Value', + 'jsonToRss.placeholder.feedTitle': '我的订阅源', + 'jsonToRss.placeholder.feedDesc': '此订阅源的描述', + 'jsonToRss.placeholder.siteLink': 'https://example.com', + 'jsonToRss.placeholder.recipeId': 'my-json-feed', + 'jsonToRss.placeholder.internalDesc': '关于此配方的备注', + 'jsonToRss.placeholder.url': 'https://api.example.com/v1/posts', + 'jsonToRss.placeholder.body': "{ 'foo': 'bar' }", + 'jsonToRss.placeholder.curl': 'curl -X POST ...', + 'jsonToRss.placeholder.items': '.items', + 'jsonToRss.placeholder.title': '.title', + 'jsonToRss.placeholder.titleTemplate': '{{ .Fields.Title | trimSpace }}', + 'jsonToRss.placeholder.link': '.url', + 'jsonToRss.placeholder.linkTemplate': + 'https://some-website.com/article/{{ .Item.id }}', + 'jsonToRss.placeholder.date': '.created_at', + 'jsonToRss.placeholder.dateTemplate': '{{ .Fields.Date }}', + 'jsonToRss.placeholder.content': '.content', + 'jsonToRss.placeholder.contentTemplate': + '{{ default .Fields.Description "暂无摘要" }}', +}; diff --git a/web/admin/src/locale/zh-CN/menu.ts b/web/admin/src/locale/zh-CN/menu.ts index ca775663..36863a3c 100644 --- a/web/admin/src/locale/zh-CN/menu.ts +++ b/web/admin/src/locale/zh-CN/menu.ts @@ -31,7 +31,7 @@ export default { 'menu.feedCompare': 'Feed 对比', 'menu.feedViewer': 'RSS 预览', 'menu.rssGenerator': 'HTML 转 RSS', - 'menu.curlToRss': 'Curl 转 RSS', + 'menu.jsonToRss': 'JSON 转 RSS', 'menu.searchToRss': '搜索 转 RSS', 'menu.llmDebug': 'LLM 调试', 'menu.adCheckDebug': '广告软文检测', diff --git a/web/admin/src/router/routes/modules/worktable.ts b/web/admin/src/router/routes/modules/worktable.ts index 76dc5b59..63fb3702 100644 --- a/web/admin/src/router/routes/modules/worktable.ts +++ b/web/admin/src/router/routes/modules/worktable.ts @@ -12,25 +12,26 @@ const WORKTABLE: AppRouteRecordRaw = { order: 1, }, children: [ - { - path: 'topic_feed', - name: 'TopicFeed', - component: () => import('@/views/dashboard/topic_feed/topic_feed.vue'), - meta: { - requiresAuth: true, - locale: 'menu.topicFeed', - }, - }, - { - path: 'topic_feed/:id', - name: 'TopicFeedDetail', - component: () => import('@/views/dashboard/topic_feed/detail.vue'), - meta: { - requiresAuth: true, - locale: 'menu.topicFeed', - hideInMenu: true, - }, - }, + // TopicFeed 功能当前仍在开发完善中,先隐藏入口;待功能 ready 后再重新开放。 + // { + // path: 'topic_feed', + // name: 'TopicFeed', + // component: () => import('@/views/dashboard/topic_feed/topic_feed.vue'), + // meta: { + // requiresAuth: true, + // locale: 'menu.topicFeed', + // }, + // }, + // { + // path: 'topic_feed/:id', + // name: 'TopicFeedDetail', + // component: () => import('@/views/dashboard/topic_feed/detail.vue'), + // meta: { + // requiresAuth: true, + // locale: 'menu.topicFeed', + // hideInMenu: true, + // }, + // }, { path: 'custom_recipe', name: 'CustomRecipe', @@ -70,11 +71,11 @@ const WORKTABLE: AppRouteRecordRaw = { }, }, { - path: 'curl-to-rss', - name: 'CurlToRss', + path: 'json-to-rss', + name: 'JsonToRss', component: () => import('@/views/dashboard/curl_to_rss/curl_to_rss.vue'), meta: { - locale: 'menu.curlToRss', + locale: 'menu.jsonToRss', requiresAuth: true, }, }, diff --git a/web/admin/src/views/dashboard/curl_to_rss/curl_to_rss.vue b/web/admin/src/views/dashboard/curl_to_rss/curl_to_rss.vue index 5ed1d72c..d509b419 100644 --- a/web/admin/src/views/dashboard/curl_to_rss/curl_to_rss.vue +++ b/web/admin/src/views/dashboard/curl_to_rss/curl_to_rss.vue @@ -1,8 +1,8 @@