diff --git a/acceptance/README.md b/acceptance/README.md index 132e806..a81d887 100644 --- a/acceptance/README.md +++ b/acceptance/README.md @@ -33,5 +33,6 @@ | Run Cache | `run-cache.feature` | | Source Intake and Fetching v0.1 | `source-intake-and-fetching.feature` | | Source Discovery v0.1 | `source-discovery.feature` | +| Manual Source URL Intake v0.1 | `manual-source-url-intake.feature` | - source-quality-and-freshness.feature: Source quality/freshness indicators, unknown caveats, and report summary behavior diff --git a/acceptance/manual-source-url-intake.feature b/acceptance/manual-source-url-intake.feature new file mode 100644 index 0000000..1c7a209 --- /dev/null +++ b/acceptance/manual-source-url-intake.feature @@ -0,0 +1,25 @@ +Feature: Manual Source URL Intake + + Scenario: User starts an investigation with optional source URLs + Given the user is on the TraceMap landing page + When the user enters a research topic + And the user enters one or more source URLs + And the user starts the investigation + Then a new analysis run should be created + And manual source URLs should be passed into source intake + And manual source URLs should be prioritized over discovered sources + And the run page should display the investigation result + + Scenario: User enters duplicate source URLs + Given the user is on the TraceMap landing page + When the user enters duplicate source URLs + And the user starts the investigation + Then duplicate URLs should be removed before source intake + And the run should not fail because of duplicates + + Scenario: User enters an invalid source URL + Given the user is on the TraceMap landing page + When the user enters an invalid source URL + And the user starts the investigation + Then the form should show a clear validation error + And no broken analysis run should be created diff --git a/e2e/home.spec.ts b/e2e/home.spec.ts index d9ca661..707ef12 100644 --- a/e2e/home.spec.ts +++ b/e2e/home.spec.ts @@ -14,4 +14,5 @@ test("home page and health endpoint are reachable", async ({ page, request }) => await expect(page.getByRole("heading", { level: 1, name: "TraceMap" })).toBeVisible(); await expect(page.getByRole("button", { name: "Start Investigation" })).toBeVisible(); + await expect(page.getByTestId("manual-source-urls-input")).toBeVisible(); }); diff --git a/specs/README.md b/specs/README.md index 9e08cd9..5007536 100644 --- a/specs/README.md +++ b/specs/README.md @@ -44,5 +44,6 @@ Each feature spec should describe: | Run Cache | [run-cache.md](./run-cache.md) | | Source Intake and Fetching v0.1 | [source-intake-and-fetching.md](./source-intake-and-fetching.md) | | Source Discovery v0.1 | [source-discovery.md](./source-discovery.md) | +| Manual Source URL Intake v0.1 | [manual-source-url-intake.md](./manual-source-url-intake.md) | - source-quality-and-freshness.md: Source Quality & Freshness Inspector v0.1 (derived quality signals, unknown caveats, report summary) diff --git a/specs/manual-source-url-intake.md b/specs/manual-source-url-intake.md new file mode 100644 index 0000000..28f37a5 --- /dev/null +++ b/specs/manual-source-url-intake.md @@ -0,0 +1,91 @@ +# Manual Source URL Intake v0.1 + +## Purpose + +Allow users to submit optional source URLs from the landing page so Investigation Mission runs can prioritize user-specified references in source intake without changing the core answer/evidence pipeline. + +## User value + +- Users can seed investigations with known high-signal references (official IR, press releases, public docs, papers, government data). +- Evidence Map traceability improves because user-intended sources are treated as first-class candidates. +- Invalid input is blocked early with clear form feedback, preventing broken runs. + +## Scope + +- Add optional multi-line URL input (`sourceUrls`) on landing intake form. +- Parse, validate, normalize, and deduplicate URLs in server action. +- Pass valid manual URLs through run creation options to source intake. +- Merge manual URLs with topic-extracted/discovered URLs, prioritizing manual URLs. +- Keep OpenAI provider schema unchanged; continue using existing `sourceCandidates` path. +- Keep existing Source Cache / Fetch Snapshot route for URL resolution. + +## Non-goals + +- RAG, embeddings, reranking, full-text crawling, PDF parsing. +- Background jobs, streaming response. +- New source tables, auth/workspace changes, upload flows. +- Large OpenAI provider schema redesign. + +## Existing implementation constraints + +- `AnalysisRun.question` is not renamed in this slice. +- `question` form field remains required and unchanged. +- Investigation mode selector behavior remains unchanged. +- Existing Evidence Map / Unknown Map / Source Lineage / Briefing Report / Report Export Lite must keep working. + +## Data model strategy + +- No Prisma schema changes and no DB migration. +- Manual URLs are transient form input passed via server-side options. +- Source candidate persistence continues via existing `source_snapshots` and source cache/fetch snapshot linkage. + +## UI requirements + +- Add textarea between Research topic and Investigation depth. +- Label: `Optional source URLs`. +- Help text: `Add one URL per line. TraceMap will prioritize these sources when building the evidence map.` +- Name: `sourceUrls`. +- `data-testid="manual-source-urls-input"`. +- Optional input, empty means existing behavior. +- Validation error can be shown near existing form error region. + +## Server action requirements + +- Read `sourceUrls` from `FormData`. +- Split by line, trim, drop empty lines. +- Validate as absolute `http(s)` URLs. +- Normalize and dedupe before forwarding. +- On invalid line(s), return form error and do not create run. +- Error message: `Source URLs must be valid http(s) URLs, one per line.` + +## Source intake requirements + +- Accept `manualSourceUrls` option in `buildSourceIntakeFromQuestion`. +- Merge URL inputs in this precedence order: + 1) manual source URLs, + 2) URLs extracted from question text, + 3) discovery provider URLs. +- Deduplicate by normalized URL while preserving higher-priority origin. +- Invalid URLs should be safely reported into `ignoredUrls` if they still reach intake. + +## Provider requirements + +- OpenAI provider schema remains unchanged. +- Manual URLs are surfaced only via existing `sourceCandidates` context. +- Optional prompt tweak may prefer user-provided candidates, but no large prompt inflation. + +## Cache requirements + +- Manual URLs can change output; avoid stale run-cache reuse. +- For v0.1 safety: skip run-cache lookup/store when `manualSourceUrls` are present. + +## Test requirements + +- Unit tests for manual URL parser/validator normalization + dedupe. +- Server action behavior for valid/invalid/manual-empty paths. +- Source intake merge priority and duplicate removal coverage. +- Existing tests remain green. + +## Acceptance references + +- `acceptance/manual-source-url-intake.feature` diff --git a/src/app/actions/create-run.ts b/src/app/actions/create-run.ts index b408166..a4e8a1e 100644 --- a/src/app/actions/create-run.ts +++ b/src/app/actions/create-run.ts @@ -5,6 +5,7 @@ import { redirect } from "next/navigation"; import { resolveInvestigationMode } from "@/server/analysis/investigation-limits"; import { createAnalysisRunFromProvider } from "@/server/analysis/create-analysis-run-from-provider"; +import { parseManualSourceUrls } from "@/app/actions/manual-source-urls"; export type CreateRunFormState = { error?: string; @@ -24,6 +25,14 @@ export async function createMockRunAction( typeof rawMode === "string" ? rawMode : undefined, ); - const runId = await createAnalysisRunFromProvider(raw.trim(), { mode }); + const manualSourceUrlsResult = parseManualSourceUrls(formData.get("sourceUrls")); + if (manualSourceUrlsResult.kind === "error") { + return { error: manualSourceUrlsResult.message }; + } + + const runId = await createAnalysisRunFromProvider(raw.trim(), { + mode, + manualSourceUrls: manualSourceUrlsResult.manualSourceUrls, + }); redirect(`/runs/${runId}` as Route); } diff --git a/src/app/actions/manual-source-urls.ts b/src/app/actions/manual-source-urls.ts new file mode 100644 index 0000000..e031d9f --- /dev/null +++ b/src/app/actions/manual-source-urls.ts @@ -0,0 +1,30 @@ +import { normalizeSourceUrl } from "@/server/analysis/source-url-normalization"; + +export const MANUAL_SOURCE_URLS_ERROR_MESSAGE = + "Source URLs must be valid http(s) URLs, one per line."; + +export type ParseManualSourceUrlsResult = + | { kind: "ok"; manualSourceUrls: string[] } + | { kind: "error"; message: string }; + +export function parseManualSourceUrls(raw: FormDataEntryValue | null): ParseManualSourceUrlsResult { + if (typeof raw !== "string" || raw.trim().length === 0) { + return { kind: "ok", manualSourceUrls: [] }; + } + + const lines = raw + .split(/\r?\n/) + .map((line) => line.trim()) + .filter((line) => line.length > 0); + + const unique = new Set(); + for (const line of lines) { + const normalized = normalizeSourceUrl(line); + if (normalized.kind !== "ok") { + return { kind: "error", message: MANUAL_SOURCE_URLS_ERROR_MESSAGE }; + } + unique.add(normalized.normalizedUrl); + } + + return { kind: "ok", manualSourceUrls: [...unique] }; +} diff --git a/src/features/landing/components/question-intake.tsx b/src/features/landing/components/question-intake.tsx index 6a332fe..9863375 100644 --- a/src/features/landing/components/question-intake.tsx +++ b/src/features/landing/components/question-intake.tsx @@ -48,6 +48,7 @@ export function QuestionIntake() { />

Examples:

+

公式URLを貼ると、根拠確認と出典追跡がしやすくなります。

+ +

+ Add one URL per line. TraceMap will prioritize these sources when building the evidence map. +

+