diff --git a/Dayflow/Dayflow/App/AppDelegate.swift b/Dayflow/Dayflow/App/AppDelegate.swift index fcdf978e..e27f0803 100644 --- a/Dayflow/Dayflow/App/AppDelegate.swift +++ b/Dayflow/Dayflow/App/AppDelegate.swift @@ -238,10 +238,16 @@ class AppDelegate: NSObject, NSApplicationDelegate { [ "provider": { switch LLMProviderType.load() { - case .geminiDirect: return "gemini" - case .dayflowBackend: return "dayflow" - case .ollamaLocal: return "ollama" - case .chatGPTClaude: return "chat_cli" + case .geminiDirect: + return "gemini" + case .dayflowBackend: + return "dayflow" + case .ollamaLocal: + return "ollama" + case .chatGPTClaude: + return "chat_cli" + case .doubaoArk: + return "doubao" } }() ]) diff --git a/Dayflow/Dayflow/Core/AI/DailyRecapModels.swift b/Dayflow/Dayflow/Core/AI/DailyRecapModels.swift index 2e320b70..ea74574f 100644 --- a/Dayflow/Dayflow/Core/AI/DailyRecapModels.swift +++ b/Dayflow/Dayflow/Core/AI/DailyRecapModels.swift @@ -44,6 +44,8 @@ enum DailyRecapProvider: String, Codable, CaseIterable, Sendable { return .gemini case .dayflowBackend: return .dayflow + case .doubaoArk: + return .dayflow case .chatGPTClaude: let preferredTool = defaults.string(forKey: "chatCLIPreferredTool") ?? "codex" return preferredTool == "claude" ? .claude : .chatgpt diff --git a/Dayflow/Dayflow/Core/AI/DoubaoArkProvider.swift b/Dayflow/Dayflow/Core/AI/DoubaoArkProvider.swift new file mode 100644 index 00000000..c9c8b4fe --- /dev/null +++ b/Dayflow/Dayflow/Core/AI/DoubaoArkProvider.swift @@ -0,0 +1,821 @@ +// +// DoubaoArkProvider.swift +// Dayflow +// + +import Foundation + +// MARK: - Volcengine Ark / Doubao (OpenAI-compatible) + +enum ArkEndpointUtilities { + /// Builds a chat-completions endpoint URL from a user-provided Ark base URL. + /// Ark commonly uses `https://ark.cn-beijing.volces.com/api/v3`. + static func chatCompletionsURL(baseURL: String) -> URL? { + let trimmed = baseURL.trimmingCharacters(in: .whitespacesAndNewlines) + guard !trimmed.isEmpty else { return nil } + guard var components = URLComponents(string: trimmed) else { return nil } + + var normalizedPath = sanitize(components.path) + let targetCandidates = [ + "/api/v3/chat/completions", + "/v3/chat/completions", + "/v1/chat/completions", + ] + if targetCandidates.contains(where: { normalizedPath.hasSuffix($0) }) { + components.path = normalizedPath + return components.url + } + + if normalizedPath.isEmpty || normalizedPath == "/" { + normalizedPath = "/api/v3/chat/completions" + } else if normalizedPath.hasSuffix("/api/v3") || normalizedPath.hasSuffix("/v3") + || normalizedPath.hasSuffix("/v1") + { + normalizedPath.append(contentsOf: "/chat/completions") + } else { + // If the user gave an Ark URL that already includes the version path in the middle, we can't safely normalize. + // Best effort: append /chat/completions. + if normalizedPath.hasSuffix("/chat") { + normalizedPath.append(contentsOf: "/completions") + } else { + normalizedPath.append(contentsOf: "/chat/completions") + } + } + + if !normalizedPath.hasPrefix("/") { + normalizedPath = "/" + normalizedPath + } + components.path = sanitize(normalizedPath) + return components.url + } + + private static func sanitize(_ path: String) -> String { + guard !path.isEmpty else { return "" } + var normalized = path + while normalized.contains("//") { + normalized = normalized.replacingOccurrences(of: "//", with: "/") + } + while normalized.count > 1 && normalized.hasSuffix("/") { + normalized.removeLast() + } + return normalized + } +} + +final class DoubaoArkProvider { + private let apiKey: String + private let endpoint: String + private let modelId: String + + init( + apiKey: String, endpoint: String = "https://ark.cn-beijing.volces.com/api/v3", + modelId: String = "doubao-seed-1-6-flash-250828" + ) { + self.apiKey = apiKey + self.endpoint = endpoint + self.modelId = modelId + } + + // MARK: - API types + + private struct ChatCompletionRequest: Encodable { + let model: String + let messages: [ChatMessage] + let temperature: Double? + let maxTokens: Int? + let responseFormat: ResponseFormat? + + enum CodingKeys: String, CodingKey { + case model + case messages + case temperature + case maxTokens = "max_tokens" + case responseFormat = "response_format" + } + } + + private struct ResponseFormat: Encodable { + let type: String + let jsonSchema: JSONSchema + + enum CodingKeys: String, CodingKey { + case type + case jsonSchema = "json_schema" + } + } + + private struct AnyCodable: Codable { + let value: Any + + init(_ value: T?) { + self.value = value ?? () + } + + init(from decoder: Decoder) throws { + let container = try decoder.singleValueContainer() + if container.decodeNil() { + self.value = () + } else if let bool = try? container.decode(Bool.self) { + self.value = bool + } else if let int = try? container.decode(Int.self) { + self.value = int + } else if let double = try? container.decode(Double.self) { + self.value = double + } else if let string = try? container.decode(String.self) { + self.value = string + } else if let array = try? container.decode([AnyCodable].self) { + self.value = array.map { $0.value } + } else if let dictionary = try? container.decode([String: AnyCodable].self) { + self.value = dictionary.mapValues { $0.value } + } else { + throw DecodingError.dataCorruptedError( + in: container, debugDescription: "AnyCodable value cannot be decoded") + } + } + + func encode(to encoder: Encoder) throws { + var container = encoder.singleValueContainer() + switch value { + case is Void: + try container.encodeNil() + case let bool as Bool: + try container.encode(bool) + case let int as Int: + try container.encode(int) + case let double as Double: + try container.encode(double) + case let string as String: + try container.encode(string) + case let array as [Any]: + try container.encode(array.map { AnyCodable($0) }) + case let dictionary as [String: Any]: + try container.encode(dictionary.mapValues { AnyCodable($0) }) + default: + throw EncodingError.invalidValue( + value, + EncodingError.Context( + codingPath: container.codingPath, debugDescription: "AnyCodable value cannot be encoded" + )) + } + } + } + + private struct JSONSchema: Encodable { + let name: String + let schema: [String: Any] + let strict: Bool + + func encode(to encoder: Encoder) throws { + var container = encoder.container(keyedBy: CodingKeys.self) + try container.encode(name, forKey: .name) + try container.encode(strict, forKey: .strict) + + // Ark expects an object for `schema` (not a JSON string). + // AnyCodable handles serializing the hardcoded schema dictionary. + try container.encode(AnyCodable(schema), forKey: .schema) + } + + init(name: String, schema: [String: Any], strict: Bool) { + self.name = name + self.schema = schema + self.strict = strict + } + + enum CodingKeys: String, CodingKey { + case name, schema, strict + } + } + + private struct ChatMessage: Codable { + let role: String + let content: ChatMessageContent + } + + private enum ChatMessageContent: Codable { + case text(String) + case parts([ChatContentPart]) + + func encode(to encoder: Encoder) throws { + var container = encoder.singleValueContainer() + switch self { + case .text(let value): + try container.encode(value) + case .parts(let parts): + try container.encode(parts) + } + } + + init(from decoder: Decoder) throws { + let container = try decoder.singleValueContainer() + if let str = try? container.decode(String.self) { + self = .text(str) + return + } + if let parts = try? container.decode([ChatContentPart].self) { + self = .parts(parts) + return + } + throw DecodingError.typeMismatch( + ChatMessageContent.self, + DecodingError.Context( + codingPath: decoder.codingPath, debugDescription: "Unsupported content shape") + ) + } + } + + private struct ChatContentPart: Codable { + let type: String + let text: String? + let imageURL: ImageURL? + let videoURL: VideoURL? + + enum CodingKeys: String, CodingKey { + case type + case text + case imageURL = "image_url" + case videoURL = "video_url" + } + + struct ImageURL: Codable { + let url: String + } + + struct VideoURL: Codable { + let url: String + let fps: Double? + } + + static func text(_ value: String) -> ChatContentPart { + ChatContentPart(type: "text", text: value, imageURL: nil, videoURL: nil) + } + + static func imageDataURL(_ url: String) -> ChatContentPart { + ChatContentPart(type: "image_url", text: nil, imageURL: ImageURL(url: url), videoURL: nil) + } + + static func videoDataURL(_ url: String, fps: Double?) -> ChatContentPart { + ChatContentPart( + type: "video_url", text: nil, imageURL: nil, videoURL: VideoURL(url: url, fps: fps)) + } + } + + private struct ChatCompletionResponse: Codable { + struct Choice: Codable { + struct Message: Codable { + let content: String? + } + let message: Message + } + + struct APIError: Codable { + let message: String? + let type: String? + let code: String? + } + + let choices: [Choice]? + let error: APIError? + } + + // MARK: - Public + + func generateText(prompt: String) async throws -> (text: String, log: LLMCall) { + let callStart = Date() + let output = try await callChatCompletions( + messages: [ChatMessage(role: "user", content: .text(prompt))], + operation: "generate_text", + batchId: nil, + includeRequestBodyInLog: true + ) + let log = LLMCall( + timestamp: callStart, latency: Date().timeIntervalSince(callStart), input: prompt, + output: output) + return (output.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines), log) + } + + func transcribeScreenshots(_ screenshots: [Screenshot], batchStartTime: Date, batchId: Int64?) + async throws -> (observations: [Observation], log: LLMCall) + { + guard !screenshots.isEmpty else { + throw NSError( + domain: "DoubaoProvider", code: 12, + userInfo: [NSLocalizedDescriptionKey: "No screenshots to transcribe"]) + } + + let callStart = Date() + let sortedScreenshots = screenshots.sorted { $0.capturedAt < $1.capturedAt } + + // Match GeminiDirectProvider's timelapse model: + // - Use a compressed timeline where each screenshot becomes 1 second of video. + // - Expand model timestamps back into real time using ScreenshotConfig.interval. + let compressionFactor = ScreenshotConfig.interval + let compressedVideoDuration = TimeInterval(sortedScreenshots.count) + + // Save a temporary MP4 (same pipeline as Gemini uses, but we send it directly to Ark). + let fileManager = FileManager.default + let tempURL = fileManager.temporaryDirectory + .appendingPathComponent("doubao_batch_\(batchId ?? 0)_\(UUID().uuidString).mp4") + defer { try? fileManager.removeItem(at: tempURL) } + + do { + let videoService = VideoProcessingService() + try await videoService.generateVideoFromScreenshots( + screenshots: sortedScreenshots, + outputURL: tempURL, + fps: 1, + useCompressedTimeline: true + ) + } catch { + throw NSError( + domain: "DoubaoProvider", code: 11, + userInfo: [ + NSLocalizedDescriptionKey: + "Failed to composite screenshots into video: \(error.localizedDescription)" + ]) + } + + let videoData = try Data(contentsOf: tempURL) + let videoDataURL = "data:video/mp4;base64,\(videoData.base64EncodedString())" + + // Format compressed video duration EXACTLY like GeminiDirectProvider. + let durationMinutes = Int(compressedVideoDuration / 60) + let durationSeconds = Int(compressedVideoDuration.truncatingRemainder(dividingBy: 60)) + let durationString = String(format: "%02d:%02d", durationMinutes, durationSeconds) + + let finalTranscriptionPrompt = LLMPromptTemplates.screenRecordingTranscriptionPrompt( + durationString: durationString, schema: LLMSchema.screenRecordingTranscriptionSchema) + + let maxRetries = 3 + var attempt = 0 + var lastError: Error? + let callGroupId = UUID().uuidString + + while attempt < maxRetries { + do { + let parts: [ChatContentPart] = [ + .text(finalTranscriptionPrompt), + .videoDataURL(videoDataURL, fps: 1), + ] + + let output = try await callChatCompletions( + messages: [ChatMessage(role: "user", content: .parts(parts))], + operation: "transcribe_screenshots", + batchId: batchId, + attempt: attempt + 1, + callGroupId: callGroupId, + includeRequestBodyInLog: false, + schema: ( + name: "screen_recording_transcription", + definition: LLMSchema.screenRecordingTranscriptionSchema + ) + ) + + let transcripts = try LLMTranscriptUtilities.decodeTranscriptChunks( + from: output, allowBracketFallback: true) + + // Convert transcripts to observations, expanding compressed video timestamps back into real time. + let conversion = LLMTranscriptUtilities.observations( + from: transcripts, + batchStartTime: batchStartTime, + observationBatchId: batchId ?? 0, // Use real batchId when provided; 0 is a safe placeholder. + llmModel: modelId, + compressedVideoDuration: compressedVideoDuration, + compressionFactor: compressionFactor, + tolerance: 10.0 + ) + let observations = conversion.observations + + if conversion.invalidTimestampCount > 0 { + throw NSError( + domain: "DoubaoProvider", code: 100, + userInfo: [ + NSLocalizedDescriptionKey: + "Model generated observations with timestamps exceeding video duration. Video is \(durationString) long but observations extended beyond this." + ]) + } + + if observations.isEmpty { + throw NSError( + domain: "DoubaoProvider", code: 101, + userInfo: [ + NSLocalizedDescriptionKey: + "No valid observations generated after filtering out invalid timestamps" + ]) + } + + let log = LLMCall( + timestamp: callStart, latency: Date().timeIntervalSince(callStart), + input: finalTranscriptionPrompt, output: output) + return (observations, log) + } catch { + lastError = error + if attempt >= maxRetries - 1 { break } + let backoff = pow(2.0, Double(attempt)) * 2.0 + try await Task.sleep(nanoseconds: UInt64(backoff * 1_000_000_000)) + } + attempt += 1 + } + + throw lastError + ?? NSError( + domain: "DoubaoProvider", code: 102, + userInfo: [ + NSLocalizedDescriptionKey: "Video transcription failed after \(maxRetries) attempts" + ]) + } + + func generateActivityCards( + observations: [Observation], context: ActivityGenerationContext, batchId: Int64? + ) async throws -> (cards: [ActivityCardData], log: LLMCall) { + let callStart = Date() + + let transcriptText = observations.map { obs in + let startTime = formatTimestampForPrompt(obs.startTs) + let endTime = formatTimestampForPrompt(obs.endTs) + return "[" + startTime + " - " + endTime + "]: " + obs.observation + }.joined(separator: "\n") + + let encoder = JSONEncoder() + encoder.outputFormatting = .prettyPrinted + let existingCardsJSON = (try? encoder.encode(context.existingCards)) ?? Data("[]".utf8) + let existingCardsString = String(data: existingCardsJSON, encoding: .utf8) ?? "[]" + let promptSections = VideoPromptSections(overrides: VideoPromptPreferences.load()) + + let languageBlock = + LLMOutputLanguagePreferences.languageInstruction(forJSON: true) + .map { "\n\n\($0)" } ?? "" + + let basePrompt = LLMPromptTemplates.activityCardsPrompt( + existingCardsString: existingCardsString, + transcriptText: transcriptText, + categoriesSection: categoriesSection(from: context.categories), + promptSections: promptSections, + languageBlock: languageBlock, schema: LLMSchema.activityCardsSchema + ) + + let maxRetries = 4 + var attempt = 0 + var lastError: Error? + var actualPromptUsed = basePrompt + var finalResponse = "" + var finalCards: [ActivityCardData] = [] + let callGroupId = UUID().uuidString + + while attempt < maxRetries { + do { + let output = try await callChatCompletions( + messages: [ChatMessage(role: "user", content: .text(actualPromptUsed))], + operation: "generate_activity_cards", + batchId: batchId, + attempt: attempt + 1, + callGroupId: callGroupId, + includeRequestBodyInLog: true, + schema: (name: "activity_cards", definition: LLMSchema.activityCardsSchema), + ) + + let cards = try parseCards(from: output) + let normalized = normalizeCards(cards, descriptors: context.categories) + let (coverageValid, coverageError) = validateTimeCoverage( + existingCards: context.existingCards, newCards: normalized) + let (durationValid, durationError) = validateTimeline(normalized) + + if coverageValid && durationValid { + finalResponse = output + finalCards = normalized + break + } + + var errorMessages: [String] = [] + if !coverageValid, let coverageError { + errorMessages.append( + """ + TIME COVERAGE ERROR: + \(coverageError) + + You MUST ensure your output cards collectively cover ALL time periods from the input cards. Do not drop any time segments. + """) + } + + if !durationValid, let durationError { + errorMessages.append( + """ + DURATION ERROR: + \(durationError) + + REMINDER: All cards except the last one must be at least 10 minutes long. Please merge short activities into longer, more meaningful cards that tell a coherent story. + """) + } + + actualPromptUsed = + basePrompt + """ + + + PREVIOUS ATTEMPT FAILED - CRITICAL REQUIREMENTS NOT MET: + + \(errorMessages.joined(separator: "\n\n")) + + Please fix these issues and ensure your output meets all requirements. + """ + + if attempt < maxRetries - 1 { + try await Task.sleep(nanoseconds: UInt64(1.0 * 1_000_000_000)) + } + } catch { + lastError = error + if attempt >= maxRetries - 1 { break } + try await Task.sleep(nanoseconds: UInt64(1.0 * 1_000_000_000)) + } + + attempt += 1 + } + + guard !finalCards.isEmpty else { + throw lastError + ?? NSError( + domain: "DoubaoProvider", code: 121, + userInfo: [ + NSLocalizedDescriptionKey: + "Activity card generation failed after \(maxRetries) attempts" + ]) + } + + let log = LLMCall( + timestamp: callStart, latency: Date().timeIntervalSince(callStart), input: actualPromptUsed, + output: finalResponse) + return (finalCards, log) + } + + // MARK: - Network + + private func callChatCompletions( + messages: [ChatMessage], + operation: String, + batchId: Int64?, + attempt: Int = 1, + callGroupId: String? = nil, + includeRequestBodyInLog: Bool, + schema: (name: String, definition: String)? = nil + ) async throws -> String { + guard let url = ArkEndpointUtilities.chatCompletionsURL(baseURL: endpoint) else { + throw NSError( + domain: "DoubaoProvider", code: 15, + userInfo: [NSLocalizedDescriptionKey: "Invalid Ark endpoint URL"]) + } + + let startedAt = Date() + var request = URLRequest(url: url) + request.httpMethod = "POST" + request.setValue("application/json", forHTTPHeaderField: "Content-Type") + request.setValue("Bearer \(apiKey)", forHTTPHeaderField: "Authorization") + request.timeoutInterval = 120 + + let responseFormat: ResponseFormat? = schema.flatMap { schema in + let schemaData = Data(schema.definition.utf8) + guard + let schemaJSON = try? JSONSerialization.jsonObject(with: schemaData, options: []) + as? [String: Any] + else { + assertionFailure("Hardcoded LLMSchema is not valid JSON: \(schema.name)") + return nil + } + return ResponseFormat( + type: "json_schema", + jsonSchema: JSONSchema(name: schema.name, schema: schemaJSON, strict: true)) + } + + let body = ChatCompletionRequest( + model: modelId, + messages: messages, + temperature: 0.3, + maxTokens: 8192, + responseFormat: responseFormat + ) + let encoder = JSONEncoder() + let bodyData = try encoder.encode(body) + request.httpBody = bodyData + + let ctx = LLMCallContext( + batchId: batchId, + callGroupId: callGroupId, + attempt: attempt, + provider: "doubao", + model: modelId, + operation: operation, + requestMethod: request.httpMethod, + requestURL: request.url, + requestHeaders: request.allHTTPHeaderFields, + requestBody: includeRequestBodyInLog ? bodyData : nil, + startedAt: startedAt + ) + + do { + let (data, response) = try await URLSession.shared.data(for: request) + let finishedAt = Date() + guard let http = response as? HTTPURLResponse else { + LLMLogger.logFailure( + ctx: ctx, http: nil, finishedAt: finishedAt, errorDomain: "DoubaoProvider", errorCode: -1, + errorMessage: "Non-HTTP response") + throw NSError( + domain: "DoubaoProvider", code: -1, + userInfo: [NSLocalizedDescriptionKey: "Non-HTTP response"]) + } + + let responseHeaders: [String: String] = http.allHeaderFields.reduce(into: [:]) { acc, kv in + if let k = kv.key as? String, let v = kv.value as? CustomStringConvertible { + acc[k] = v.description + } + } + let httpInfo = LLMHTTPInfo( + httpStatus: http.statusCode, responseHeaders: responseHeaders, responseBody: data) + + guard http.statusCode == 200 else { + let bodyText = String(data: data, encoding: .utf8) ?? "" + LLMLogger.logFailure( + ctx: ctx, http: httpInfo, finishedAt: finishedAt, errorDomain: "DoubaoProvider", + errorCode: http.statusCode, errorMessage: bodyText) + throw NSError( + domain: "DoubaoProvider", code: http.statusCode, + userInfo: [NSLocalizedDescriptionKey: "HTTP \(http.statusCode): \(bodyText)"]) + } + + let decoded = (try? JSONDecoder().decode(ChatCompletionResponse.self, from: data)) + if let apiError = decoded?.error, let message = apiError.message { + LLMLogger.logFailure( + ctx: ctx, http: httpInfo, finishedAt: finishedAt, errorDomain: "DoubaoProvider", + errorCode: http.statusCode, errorMessage: message) + throw NSError( + domain: "DoubaoProvider", code: http.statusCode, + userInfo: [NSLocalizedDescriptionKey: message]) + } + + let text = + decoded?.choices?.first?.message.content + ?? String(data: data, encoding: .utf8) + ?? "" + LLMLogger.logSuccess(ctx: ctx, http: httpInfo, finishedAt: finishedAt) + return text + } catch { + let finishedAt = Date() + if (error as NSError).domain != "DoubaoProvider" { + LLMLogger.logFailure( + ctx: ctx, + http: nil, + finishedAt: finishedAt, + errorDomain: (error as NSError).domain, + errorCode: (error as NSError).code, + errorMessage: (error as NSError).localizedDescription + ) + } + throw error + } + } + + // MARK: - Parsing helpers + + private func parseCards(from output: String) throws -> [ActivityCardData] { + let cleaned = + output + .replacingOccurrences(of: "```json", with: "") + .replacingOccurrences(of: "```", with: "") + .trimmingCharacters(in: .whitespacesAndNewlines) + + if let data = cleaned.data(using: .utf8), + let cards = try? JSONDecoder().decode([ActivityCardData].self, from: data) + { + return cards + } + + // Bracket-balance extraction: take the last ']' and find matching '['. + func findBalancedArrayStart(_ str: String, endBracket: String.Index) -> String.Index? { + var balance = 0 + var idx = endBracket + while true { + let ch = str[idx] + if ch == "]" { + balance += 1 + } else if ch == "[" { + balance -= 1 + if balance == 0 { return idx } + } + if idx == str.startIndex { break } + idx = str.index(before: idx) + } + return nil + } + + if let lastBracket = cleaned.lastIndex(of: "]"), + let firstBracket = findBalancedArrayStart(cleaned, endBracket: lastBracket) + { + let slice = String(cleaned[firstBracket...lastBracket]) + .trimmingCharacters(in: .whitespacesAndNewlines) + if let data = slice.data(using: .utf8), + let cards = try? JSONDecoder().decode([ActivityCardData].self, from: data) + { + return cards + } + } + + throw NSError( + domain: "DoubaoProvider", code: 32, + userInfo: [NSLocalizedDescriptionKey: "Failed to decode activity cards: \(output)"]) + } + + // MARK: - Prompt helpers + + // NOTE: DoubaoArkProvider intentionally uses the exact prompt string from GeminiDirectProvider + // for video transcription, so we do not maintain a separate prompt builder here. + + // MARK: - Time helpers + + private func formatSeconds(_ seconds: TimeInterval) -> String { + LLMVideoTimestampUtilities.formatSecondsHHMMSS(seconds) + } + + private func parseVideoTimestamp(_ timestamp: String) -> Int { + LLMVideoTimestampUtilities.parseVideoTimestamp(timestamp) + } + + private func formatTimestampForPrompt(_ unixTime: Int) -> String { + LLMTimelineCardValidation.formatTimestampForPrompt(unixTime) + } + + // MARK: - Categories + + private func categoriesSection(from descriptors: [LLMCategoryDescriptor]) -> String { + guard !descriptors.isEmpty else { + return + "USER CATEGORIES: No categories configured. Use consistent labels based on the activity story." + } + + let allowed = descriptors.map { "\"\($0.name)\"" }.joined(separator: ", ") + var lines: [String] = ["USER CATEGORIES (choose exactly one label):"] + for (index, descriptor) in descriptors.enumerated() { + var desc = descriptor.description?.trimmingCharacters(in: .whitespacesAndNewlines) ?? "" + if descriptor.isIdle && desc.isEmpty { + desc = "Use when the user is idle for most of this period." + } + let suffix = desc.isEmpty ? "" : " — \(desc)" + lines.append("\(index + 1). \"\(descriptor.name)\"\(suffix)") + } + + if let idle = descriptors.first(where: { $0.isIdle }) { + lines.append( + "Only use \"\(idle.name)\" when the user is idle for more than half of the timeframe. Otherwise pick the closest non-idle label." + ) + } + lines.append("Return the category exactly as written. Allowed values: [\(allowed)].") + return lines.joined(separator: "\n") + } + + private func normalizeCategory(_ raw: String, descriptors: [LLMCategoryDescriptor]) -> String { + let cleaned = raw.trimmingCharacters(in: .whitespacesAndNewlines) + guard !cleaned.isEmpty else { return descriptors.first?.name ?? "" } + let normalized = cleaned.lowercased() + if let match = descriptors.first(where: { + $0.name.trimmingCharacters(in: .whitespacesAndNewlines).lowercased() == normalized + }) { + return match.name + } + if let idle = descriptors.first(where: { $0.isIdle }) { + let idleLabels = ["idle", "idle time", idle.name.lowercased()] + if idleLabels.contains(normalized) { + return idle.name + } + } + return descriptors.first?.name ?? cleaned + } + + private func normalizeCards(_ cards: [ActivityCardData], descriptors: [LLMCategoryDescriptor]) + -> [ActivityCardData] + { + cards.map { card in + ActivityCardData( + startTime: card.startTime, + endTime: card.endTime, + category: normalizeCategory(card.category, descriptors: descriptors), + subcategory: card.subcategory, + title: card.title, + summary: card.summary, + detailedSummary: card.detailedSummary, + distractions: card.distractions, + appSites: card.appSites + ) + } + } + + // MARK: - Card validation (shared logic mirrored from other providers) + + private func timeToMinutes(_ timeStr: String) -> Double { + LLMTimelineCardValidation.timeToMinutes(timeStr) + } + + private func validateTimeCoverage(existingCards: [ActivityCardData], newCards: [ActivityCardData]) + -> (isValid: Bool, error: String?) + { + LLMTimelineCardValidation.validateTimeCoverage(existingCards: existingCards, newCards: newCards) + } + + private func validateTimeline(_ cards: [ActivityCardData]) -> (isValid: Bool, error: String?) { + LLMTimelineCardValidation.validateTimeline(cards) + } +} diff --git a/Dayflow/Dayflow/Core/AI/GeminiDirectProvider.swift b/Dayflow/Dayflow/Core/AI/GeminiDirectProvider.swift index 37a9c5a5..b49e9b8b 100644 --- a/Dayflow/Dayflow/Core/AI/GeminiDirectProvider.swift +++ b/Dayflow/Dayflow/Core/AI/GeminiDirectProvider.swift @@ -266,38 +266,10 @@ final class GeminiDirectProvider { // realDuration is available via compressionFactor if needed for debugging - let finalTranscriptionPrompt = """ - Screen Recording Transcription (Reconstruct Mode) - Watch this screen recording and create an activity log detailed enough that someone could reconstruct the session. - CRITICAL: This video is exactly \(durationString) long. ALL timestamps must be within 00:00 to \(durationString). No gaps. - Identifying the active app: On macOS, the app name is always shown in the top-left corner of the screen, right next to the Apple () menu. Check this FIRST to identify which app is being used. Do NOT guess — read the actual name from the menu bar. If you can't read it clearly, describe it generically (e.g., "code editor," "browser," "messaging app") rather than guessing a specific product name. Common code editors like Cursor, VS Code, Xcode, and Zed all look similar but have different names in the menu bar. - For each segment, ask yourself: - "What EXACTLY did they do? What SPECIFIC things can I see?" - Capture: - - Exact app/site names visible (check menu bar for app name) - - Exact file names, URLs, page titles - - Exact usernames, search queries, messages - - Exact numbers, stats, prices shown - Bad: "Checked email" - Good: "Gmail: Read email from boss@company.com 'RE: Budget approval' - replied 'Looks good'" - Bad: "Browsing Twitter" - Good: "Twitter/X: Scrolled feed - viewed posts by @pmarca about AI, @sama thread on GPT-5 (12 tweets)" - Bad: "Working on code" - Good: "Editing StorageManager.swift in [exact app name from menu bar] - fixed type error on line 47, changed String to String?" - Segments: - - 3-8 segments total - - You may use 1 segment only if the user appears idle for most of the recording - - Group by GOAL not app (IDE + Terminal + Browser for the same task = 1 segment) - - Do not create gaps; cover the full timeline - Return ONLY JSON in this format: - [ - { - "startTimestamp": "MM:SS", - "endTimestamp": "MM:SS", - "description": "1-3 sentences with specific details" - } - ] - """ + let finalTranscriptionPrompt = LLMPromptTemplates.screenRecordingTranscriptionPrompt( + durationString: durationString, + schema: LLMSchema.screenRecordingTranscriptionSchema + ) // UNIFIED RETRY LOOP - Handles ALL errors comprehensively let maxRetries = 3 @@ -323,53 +295,29 @@ final class GeminiDirectProvider { attempt: attempt + 1 ) - let videoTranscripts = try parseTranscripts(response) - - // Convert video transcripts to observations with proper Unix timestamps - // Timestamps from Gemini are in compressed video time, so we expand them - // by the compression factor to get real-world timestamps. - var hasValidationErrors = false - let observations = videoTranscripts.compactMap { chunk -> Observation? in - let compressedStartSeconds = parseVideoTimestamp(chunk.startTimestamp) - let compressedEndSeconds = parseVideoTimestamp(chunk.endTimestamp) - - // Validate timestamps are within compressed video duration (with small tolerance) - let tolerance: TimeInterval = 10.0 // 10 seconds tolerance in compressed time - if Double(compressedStartSeconds) < -tolerance - || Double(compressedEndSeconds) > videoDuration + tolerance - { - print( - "āŒ VALIDATION ERROR: Observation timestamps (\(chunk.startTimestamp) - \(chunk.endTimestamp)) exceed video duration \(durationString)!" - ) - hasValidationErrors = true - return nil - } - - // Expand timestamps by compression factor to get real-world time - let realStartSeconds = TimeInterval(compressedStartSeconds) * compressionFactor - let realEndSeconds = TimeInterval(compressedEndSeconds) * compressionFactor - - let startDate = batchStartTime.addingTimeInterval(realStartSeconds) - let endDate = batchStartTime.addingTimeInterval(realEndSeconds) - - print( - "šŸ“ Timestamp expansion: \(chunk.startTimestamp)-\(chunk.endTimestamp) → \(Int(realStartSeconds))s-\(Int(realEndSeconds))s real" - ) - - return Observation( - id: nil, - batchId: 0, // Will be set when saved - startTs: Int(startDate.timeIntervalSince1970), - endTs: Int(endDate.timeIntervalSince1970), - observation: chunk.description, - metadata: nil, - llmModel: usedModel, - createdAt: Date() - ) - } + let videoTranscripts = try LLMTranscriptUtilities.decodeTranscriptChunks( + from: response, allowBracketFallback: false) + + // Convert video transcripts to observations with proper Unix timestamps. + // Timestamps from Gemini are in compressed video time, so we expand them by `compressionFactor`. + let conversion = LLMTranscriptUtilities.observations( + from: videoTranscripts, + batchStartTime: batchStartTime, + observationBatchId: 0, // Will be set when saved + llmModel: usedModel, + compressedVideoDuration: videoDuration, + compressionFactor: compressionFactor, + tolerance: 10.0, + debugPrintExpansion: true + ) + let observations = conversion.observations + let hasValidationErrors = conversion.invalidTimestampCount > 0 // If we had validation errors, throw to trigger retry if hasValidationErrors { + print( + "āŒ VALIDATION ERROR: One or more transcript chunks exceeded video duration \(durationString) (invalidCount=\(conversion.invalidTimestampCount))" + ) AnalyticsService.shared.captureValidationFailure( provider: "gemini", operation: "transcribe", @@ -588,139 +536,20 @@ final class GeminiDirectProvider { encoder.outputFormatting = .prettyPrinted let existingCardsJSON = try encoder.encode(context.existingCards) let existingCardsString = String(data: existingCardsJSON, encoding: .utf8) ?? "[]" - let promptSections = GeminiPromptSections(overrides: GeminiPromptPreferences.load()) + let promptSections = VideoPromptSections(overrides: VideoPromptPreferences.load()) let languageBlock = LLMOutputLanguagePreferences.languageInstruction(forJSON: true) .map { "\n\n\($0)" } ?? "" - let basePrompt = """ - # Timeline Card Generation - - You're writing someone's personal work journal. You'll get raw activity logs — screenshots, app switches, URLs — and your job is to turn them into timeline cards that help this person remember what they actually did. - - The test: when they scan their timeline tomorrow morning, each card should make them go "oh right, that." - - Write as if you ARE the person jotting down notes about their day. Not an analyst writing a report. Not a manager filing a status update. - - --- - - ## Card Structure - - Each card covers one cohesive chunk of activity, roughly 15–60 minutes. - - - Minimum 10 minutes per card. If something would be shorter, fold it into the neighboring card that makes the most sense. - - Maximum 60 minutes. If a card runs longer, split it where the focus naturally shifts. - - No gaps or overlaps between cards. If there's a real gap in the source data, preserve it. Otherwise, cards should meet cleanly. - - **When to start a new card:** - 1. What's the main thing happening right now? - 2. Does the next chunk of activity continue that same thing? → Keep extending. - 3. Is there a brief unrelated detour (<5 min)? → Log it as a distraction, keep the card going. - 4. Has the focus genuinely shifted for 10+ minutes? → New card. - - **When to merge with a previous card:** - 1. Is the previous card's main activity the same as what's happening now? (same PR, same feature, same codebase, same article) → Merge. - 2. Did the person just take a 2–5 minute break (X, messages, YouTube) and come back to the same thing? → That's a distraction, not a new card. Merge. - 3. Are two adjacent cards both "scrolling X with occasional work check-ins"? → Merge. The vibe didn't change. - 4. Only start a new card if the CORE INTENT changed for 10+ minutes. - - DEFAULT TO MERGING. Two 15-minute cards about the same work stream should almost never exist. If you're unsure whether to merge or split, merge. - - --- - - \(promptSections.title) - - --- - - \(promptSections.summary) - - --- - - \(promptSections.detailedSummary) - - \(languageBlock) - - --- - - ## Category - - \(categoriesSection(from: context.categories)) - - --- - - ## Distractions - - A distraction is a brief (<5 min) unrelated interruption inside a card. Checking X for 2 minutes while debugging is a distraction. Spending 15 minutes on X is not a distraction — it's either part of the card's theme or it's a new card. - - Don't label related sub-tasks as distractions. Googling an error message while debugging isn't a distraction, it's part of debugging. - - --- - - ## App Sites - - Identify the main app or website for each card. - - - primary: the main app used in the card (canonical domain, lowercase, no protocol). - - secondary: another meaningful app used, or the enclosing app (e.g., browser). Omit if there isn't a clear one. - - Be specific: docs.google.com not google.com, mail.google.com not google.com. - - Common mappings: - - Figma → figma.com - - Notion → notion.so - - Google Docs → docs.google.com - - Gmail → mail.google.com - - VS Code → code.visualstudio.com - - Xcode → developer.apple.com/xcode - - Twitter/X → x.com - - Zoom → zoom.us - - ChatGPT → chatgpt.com - - --- - - ## Continuity Rules - - Your output cards must cover the same total time range as the previous cards plus any new observations. Think of previous cards as a draft you're revising and extending, not locked history. - - - Don't drop time segments that were previously covered. - - If new observations extend beyond the previous range, add cards to cover the new time. - - Preserve genuine gaps in the source data. - - Before generating output, review the previous cards and ask: - - Could any two adjacent previous cards be the same activity session? - - Does your first new card continue the last previous card's work? - If yes to either, merge them in your output. - - INPUTS: - Previous cards: \(existingCardsString) - New observations: \(transcriptText) - Return ONLY a JSON array with this EXACT structure: - - [ - { - "startTime": "1:12 AM", - "endTime": "1:30 AM", - "category": "", - "subcategory": "", - "title": "", - "summary": "", - "detailedSummary": "", - "distractions": [ - { - "startTime": "1:15 AM", - "endTime": "1:18 AM", - "title": "", - "summary": "" - } - ], - "appSites": { - "primary": "", - "secondary": " - } - } - ] - """ + let basePrompt = LLMPromptTemplates.activityCardsPrompt( + existingCardsString: existingCardsString, + transcriptText: transcriptText, + categoriesSection: categoriesSection(from: context.categories), + promptSections: promptSections, + languageBlock: languageBlock, + schema: LLMSchema.activityCardsSchema, + ) // UNIFIED RETRY LOOP - Handles ALL errors comprehensively let maxRetries = 4 @@ -1171,26 +1000,14 @@ final class GeminiDirectProvider { fileURI: String, mimeType: String, prompt: String, batchId: Int64?, groupId: String, model: GeminiModel, attempt: Int ) async throws -> (String, String) { - let transcriptionSchema: [String: Any] = [ - "type": "ARRAY", - "items": [ - "type": "OBJECT", - "properties": [ - "startTimestamp": ["type": "STRING"], - "endTimestamp": ["type": "STRING"], - "description": ["type": "STRING"], - ], - "required": ["startTimestamp", "endTimestamp", "description"], - "propertyOrdering": ["startTimestamp", "endTimestamp", "description"], - ], - ] - + let transcriptionSchemaObject = try! JSONSerialization.jsonObject( + with: Data(LLMSchema.screenRecordingTranscriptionSchema.utf8)) let generationConfig: [String: Any] = [ "temperature": 0.3, "maxOutputTokens": 65536, "mediaResolution": "MEDIA_RESOLUTION_HIGH", "responseMimeType": "application/json", - "responseSchema": transcriptionSchema, + "responseJsonSchema": transcriptionSchemaObject, ] let requestBody: [String: Any] = [ @@ -1476,34 +1293,6 @@ final class GeminiDirectProvider { } } - // Temporary struct for parsing Gemini response - private struct VideoTranscriptChunk: Codable { - let startTimestamp: String // MM:SS - let endTimestamp: String // MM:SS - let description: String - } - - private func parseTranscripts(_ response: String) throws -> [VideoTranscriptChunk] { - guard let data = response.data(using: .utf8) else { - print( - "šŸ”Ž GEMINI DEBUG: parseTranscripts received non-UTF8 or empty response: \(truncate(response, max: 400))" - ) - throw NSError( - domain: "GeminiError", code: 8, - userInfo: [NSLocalizedDescriptionKey: "Invalid response encoding"]) - } - do { - let transcripts = try JSONDecoder().decode([VideoTranscriptChunk].self, from: data) - return transcripts - } catch { - let snippet = truncate(String(data: data, encoding: .utf8) ?? "", max: 1200) - print( - "šŸ”Ž GEMINI DEBUG: parseTranscripts JSON decode failed: \(error.localizedDescription) bodySnippet=\(snippet)" - ) - throw error - } - } - private func geminiCardsRequest( prompt: String, batchId: Int64?, groupId: String, model: GeminiModel, attempt: Int ) async throws -> String { @@ -1865,267 +1654,27 @@ final class GeminiDirectProvider { // (no local logging helpers needed; centralized via LLMLogger) - private struct TimeRange { - let start: Double // minutes from midnight - let end: Double - } - private func timeToMinutes(_ timeStr: String) -> Double { - // Handle both "10:30 AM" and "05:30" formats - if timeStr.contains("AM") || timeStr.contains("PM") { - // Clock format - parse as date - let formatter = DateFormatter() - formatter.dateFormat = "h:mm a" - formatter.locale = Locale(identifier: "en_US_POSIX") - - if let date = formatter.date(from: timeStr) { - let calendar = Calendar.current - let components = calendar.dateComponents([.hour, .minute], from: date) - return Double((components.hour ?? 0) * 60 + (components.minute ?? 0)) - } - return 0 - } else { - // MM:SS format - convert to minutes - let seconds = parseVideoTimestamp(timeStr) - return Double(seconds) / 60.0 - } - } - - private func mergeOverlappingRanges(_ ranges: [TimeRange]) -> [TimeRange] { - guard !ranges.isEmpty else { return [] } - - // Sort by start time - let sorted = ranges.sorted { $0.start < $1.start } - var merged: [TimeRange] = [] - - for range in sorted { - if merged.isEmpty || range.start > merged.last!.end + 1 { - // No overlap - add as new range - merged.append(range) - } else { - // Overlap or adjacent - merge with last range - let last = merged.removeLast() - merged.append(TimeRange(start: last.start, end: max(last.end, range.end))) - } - } - - return merged + LLMTimelineCardValidation.timeToMinutes(timeStr) } private func validateTimeCoverage(existingCards: [ActivityCardData], newCards: [ActivityCardData]) -> (isValid: Bool, error: String?) { - guard !existingCards.isEmpty else { - return (true, nil) - } - - // Extract time ranges from input cards - var inputRanges: [TimeRange] = [] - for card in existingCards { - let startMin = timeToMinutes(card.startTime) - var endMin = timeToMinutes(card.endTime) - if endMin < startMin { // Handle day rollover - endMin += 24 * 60 - } - inputRanges.append(TimeRange(start: startMin, end: endMin)) - } - - // Merge overlapping/adjacent ranges - let mergedInputRanges = mergeOverlappingRanges(inputRanges) - - // Extract time ranges from output cards (Fix #1: Skip zero or negative duration cards) - var outputRanges: [TimeRange] = [] - for card in newCards { - let startMin = timeToMinutes(card.startTime) - var endMin = timeToMinutes(card.endTime) - if endMin < startMin { // Handle day rollover - endMin += 24 * 60 - } - // Skip zero or very short duration cards (less than 0.1 minutes = 6 seconds) - guard endMin - startMin >= 0.1 else { - continue - } - outputRanges.append(TimeRange(start: startMin, end: endMin)) - } - - // Check coverage with 3-minute flexibility - let flexibility = 3.0 // minutes - var uncoveredSegments: [(start: Double, end: Double)] = [] - - for inputRange in mergedInputRanges { - // Check if this input range is covered by output ranges - var coveredStart = inputRange.start - var safetyCounter = 10000 // Fix #3: Safety cap to prevent infinite loops - - while coveredStart < inputRange.end && safetyCounter > 0 { - safetyCounter -= 1 - // Find an output range that covers this point - var foundCoverage = false - - for outputRange in outputRanges { - // Check if this output range covers the current point (with flexibility) - if outputRange.start - flexibility <= coveredStart - && coveredStart <= outputRange.end + flexibility - { - // Move coveredStart to the end of this output range (Fix #2: Force progress) - let newCoveredStart = outputRange.end - // Ensure we make at least minimal progress (0.01 minutes = 0.6 seconds) - coveredStart = max(coveredStart + 0.01, newCoveredStart) - foundCoverage = true - break - } - } - - if !foundCoverage { - // Find the next covered point - var nextCovered = inputRange.end - for outputRange in outputRanges { - if outputRange.start > coveredStart && outputRange.start < nextCovered { - nextCovered = outputRange.start - } - } - - // Add uncovered segment - if nextCovered > coveredStart { - uncoveredSegments.append((start: coveredStart, end: min(nextCovered, inputRange.end))) - coveredStart = nextCovered - } else { - // No more coverage found, add remaining segment and break - uncoveredSegments.append((start: coveredStart, end: inputRange.end)) - break - } - } - } - - // Check if safety counter was exhausted - if safetyCounter == 0 { - return ( - false, - "Time coverage validation loop exceeded safety limit - possible infinite loop detected" - ) - } - } - - // Check if uncovered segments are significant - if !uncoveredSegments.isEmpty { - var uncoveredDesc: [String] = [] - for segment in uncoveredSegments { - let duration = segment.end - segment.start - if duration > flexibility { // Only report significant gaps - let startTime = minutesToTimeString(segment.start) - let endTime = minutesToTimeString(segment.end) - uncoveredDesc.append("\(startTime)-\(endTime) (\(Int(duration)) min)") - } - } - - if !uncoveredDesc.isEmpty { - // Build detailed error message with input/output cards - var errorMsg = - "Missing coverage for time segments: \(uncoveredDesc.joined(separator: ", "))" - errorMsg += "\n\nšŸ“„ INPUT CARDS:" - for (i, card) in existingCards.enumerated() { - errorMsg += "\n \(i+1). \(card.startTime) - \(card.endTime): \(card.title)" - } - errorMsg += "\n\nšŸ“¤ OUTPUT CARDS:" - for (i, card) in newCards.enumerated() { - errorMsg += "\n \(i+1). \(card.startTime) - \(card.endTime): \(card.title)" - } - - return (false, errorMsg) - } - } - - return (true, nil) + LLMTimelineCardValidation.validateTimeCoverage(existingCards: existingCards, newCards: newCards) } private func validateTimeline(_ cards: [ActivityCardData]) -> (isValid: Bool, error: String?) { - for (index, card) in cards.enumerated() { - let startTime = card.startTime - let endTime = card.endTime - - var durationMinutes: Double = 0 - - // Check if times are in clock format (contains AM/PM) - if startTime.contains("AM") || startTime.contains("PM") { - let formatter = DateFormatter() - formatter.dateFormat = "h:mm a" - formatter.locale = Locale(identifier: "en_US_POSIX") - - if let startDate = formatter.date(from: startTime), - let endDate = formatter.date(from: endTime) - { - - var adjustedEndDate = endDate - // Handle day rollover (e.g., 11:30 PM to 12:30 AM) - if endDate < startDate { - adjustedEndDate = - Calendar.current.date(byAdding: .day, value: 1, to: endDate) ?? endDate - } - - durationMinutes = adjustedEndDate.timeIntervalSince(startDate) / 60.0 - } else { - // Failed to parse clock times - durationMinutes = 0 - } - } else { - // Parse MM:SS format - let startSeconds = parseVideoTimestamp(startTime) - let endSeconds = parseVideoTimestamp(endTime) - durationMinutes = Double(endSeconds - startSeconds) / 60.0 - } - - // Check if card is too short (except for last card) - if durationMinutes < 10 && index < cards.count - 1 { - return ( - false, - "Card \(index + 1) '\(card.title)' is only \(String(format: "%.1f", durationMinutes)) minutes long" - ) - } - } - - return (true, nil) + LLMTimelineCardValidation.validateTimeline(cards) } private func minutesToTimeString(_ minutes: Double) -> String { - let hours = (Int(minutes) / 60) % 24 // Handle > 24 hours - let mins = Int(minutes) % 60 - let period = hours < 12 ? "AM" : "PM" - var displayHour = hours % 12 - if displayHour == 0 { - displayHour = 12 - } - return String(format: "%d:%02d %@", displayHour, mins, period) - } - - private func parseVideoTimestamp(_ timestamp: String) -> Int { - let components = timestamp.components(separatedBy: ":") - - if components.count == 2 { - // MM:SS format - let minutes = Int(components[0]) ?? 0 - let seconds = Int(components[1]) ?? 0 - return minutes * 60 + seconds - } else if components.count == 3 { - // HH:MM:SS format - let hours = Int(components[0]) ?? 0 - let minutes = Int(components[1]) ?? 0 - let seconds = Int(components[2]) ?? 0 - return hours * 3600 + minutes * 60 + seconds - } else { - // Invalid format, return 0 - print("Warning: Invalid video timestamp format: \(timestamp)") - return 0 - } + LLMTimelineCardValidation.minutesToTimeString(minutes) } // Helper function to format timestamps private func formatTimestampForPrompt(_ unixTime: Int) -> String { - let date = Date(timeIntervalSince1970: TimeInterval(unixTime)) - let formatter = DateFormatter() - formatter.dateFormat = "h:mm a" - formatter.locale = Locale(identifier: "en_US_POSIX") - formatter.timeZone = TimeZone.current - return formatter.string(from: date) + LLMTimelineCardValidation.formatTimestampForPrompt(unixTime) } // MARK: - Text Generation @@ -2135,9 +1684,13 @@ final class GeminiDirectProvider { { let callStart = Date() + let activityCardsSchemaObject = try? JSONSerialization.jsonObject( + with: Data(LLMSchema.activityCardsSchema.utf8)) let generationConfig: [String: Any] = [ "temperature": 0.7, "maxOutputTokens": maxOutputTokens, + "responseMimeType": "application/json", + "responseJsonSchema": activityCardsSchemaObject, ] let requestBody: [String: Any] = [ diff --git a/Dayflow/Dayflow/Core/AI/GeminiPromptPreferences.swift b/Dayflow/Dayflow/Core/AI/GeminiPromptPreferences.swift index b416a32a..1f8d2d2c 100644 --- a/Dayflow/Dayflow/Core/AI/GeminiPromptPreferences.swift +++ b/Dayflow/Dayflow/Core/AI/GeminiPromptPreferences.swift @@ -1,6 +1,6 @@ import Foundation -struct GeminiPromptOverrides: Codable, Equatable { +struct VideoPromptOverrides: Codable, Equatable { var titleBlock: String? var summaryBlock: String? var detailedBlock: String? @@ -14,21 +14,21 @@ struct GeminiPromptOverrides: Codable, Equatable { } } -enum GeminiPromptPreferences { +enum VideoPromptPreferences { private static let overridesKey = "geminiPromptOverrides" private static let store = UserDefaults.standard - static func load() -> GeminiPromptOverrides { + static func load() -> VideoPromptOverrides { guard let data = store.data(forKey: overridesKey) else { - return GeminiPromptOverrides() + return VideoPromptOverrides() } - guard let overrides = try? JSONDecoder().decode(GeminiPromptOverrides.self, from: data) else { - return GeminiPromptOverrides() + guard let overrides = try? JSONDecoder().decode(VideoPromptOverrides.self, from: data) else { + return VideoPromptOverrides() } return overrides } - static func save(_ overrides: GeminiPromptOverrides) { + static func save(_ overrides: VideoPromptOverrides) { guard let data = try? JSONEncoder().encode(overrides) else { return } store.set(data, forKey: overridesKey) } @@ -145,17 +145,17 @@ enum GeminiPromptDefaults { """ } -struct GeminiPromptSections { +struct VideoPromptSections { let title: String let summary: String let detailedSummary: String - init(overrides: GeminiPromptOverrides) { - self.title = GeminiPromptSections.compose( + init(overrides: VideoPromptOverrides) { + self.title = VideoPromptSections.compose( defaultBlock: GeminiPromptDefaults.titleBlock, custom: overrides.titleBlock) - self.summary = GeminiPromptSections.compose( + self.summary = VideoPromptSections.compose( defaultBlock: GeminiPromptDefaults.summaryBlock, custom: overrides.summaryBlock) - self.detailedSummary = GeminiPromptSections.compose( + self.detailedSummary = VideoPromptSections.compose( defaultBlock: GeminiPromptDefaults.detailedSummaryBlock, custom: overrides.detailedBlock) } @@ -164,3 +164,153 @@ struct GeminiPromptSections { return trimmed.isEmpty ? defaultBlock : trimmed } } + +/// Shared prompt templates used by multiple LLM providers. +/// +/// When prompts must remain *exactly* identical between providers, keep them here and call these helpers. +enum LLMPromptTemplates { + static func screenRecordingTranscriptionPrompt(durationString: String, schema: String) -> String { + """ + Screen Recording Transcription (Reconstruct Mode) + Watch this screen recording and create an activity log detailed enough that someone could reconstruct the session. + CRITICAL: This video is exactly \(durationString) long. ALL timestamps must be within 00:00 to \(durationString). No gaps. + Identifying the active app: On macOS, the app name is always shown in the top-left corner of the screen, right next to the Apple () menu. Check this FIRST to identify which app is being used. Do NOT guess — read the actual name from the menu bar. If you can't read it clearly, describe it generically (e.g., "code editor," "browser," "messaging app") rather than guessing a specific product name. Common code editors like Cursor, VS Code, Xcode, and Zed all look similar but have different names in the menu bar. + For each segment, ask yourself: + "What EXACTLY did they do? What SPECIFIC things can I see?" + Capture: + - Exact app/site names visible (check menu bar for app name) + - Exact file names, URLs, page titles + - Exact usernames, search queries, messages + - Exact numbers, stats, prices shown + Bad: "Checked email" + Good: "Gmail: Read email from boss@company.com 'RE: Budget approval' - replied 'Looks good'" + Bad: "Browsing Twitter" + Good: "Twitter/X: Scrolled feed - viewed posts by @pmarca about AI, @sama thread on GPT-5 (12 tweets)" + Bad: "Working on code" + Good: "Editing StorageManager.swift in [exact app name from menu bar] - fixed type error on line 47, changed String to String?" + Segments: + - 3-8 segments total + - You may use 1 segment only if the user appears idle for most of the recording + - Group by GOAL not app (IDE + Terminal + Browser for the same task = 1 segment) + - Do not create gaps; cover the full timeline + + Return a JSON array that follows the schema: \(schema) + """ + } + + static func activityCardsPrompt( + existingCardsString: String, + transcriptText: String, + categoriesSection: String, + promptSections: VideoPromptSections, + languageBlock: String, + schema: String, + ) -> String { + """ + # Timeline Card Generation + + You're writing someone's personal work journal. You'll get raw activity logs — screenshots, app switches, URLs — and your job is to turn them into timeline cards that help this person remember what they actually did. + + The test: when they scan their timeline tomorrow morning, each card should make them go "oh right, that." + + Write as if you ARE the person jotting down notes about their day. Not an analyst writing a report. Not a manager filing a status update. + + --- + + ## Card Structure + + Each card covers one cohesive chunk of activity, roughly 15–60 minutes. + + - Minimum 10 minutes per card. If something would be shorter, fold it into the neighboring card that makes the most sense. + - Maximum 60 minutes. If a card runs longer, split it where the focus naturally shifts. + - No gaps or overlaps between cards. If there's a real gap in the source data, preserve it. Otherwise, cards should meet cleanly. + + **When to start a new card:** + 1. What's the main thing happening right now? + 2. Does the next chunk of activity continue that same thing? → Keep extending. + 3. Is there a brief unrelated detour (<5 min)? → Log it as a distraction, keep the card going. + 4. Has the focus genuinely shifted for 10+ minutes? → New card. + + **When to merge with a previous card:** + 1. Is the previous card's main activity the same as what's happening now? (same PR, same feature, same codebase, same article) → Merge. + 2. Did the person just take a 2–5 minute break (X, messages, YouTube) and come back to the same thing? → That's a distraction, not a new card. Merge. + 3. Are two adjacent cards both "scrolling X with occasional work check-ins"? → Merge. The vibe didn't change. + 4. Only start a new card if the CORE INTENT changed for 10+ minutes. + + DEFAULT TO MERGING. Two 15-minute cards about the same work stream should almost never exist. If you're unsure whether to merge or split, merge. + + + --- + + \(promptSections.title) + + --- + + \(promptSections.summary) + + --- + + \(promptSections.detailedSummary) + + \(languageBlock) + + --- + + ## Category + + \(categoriesSection) + + --- + + ## Distractions + + A distraction is a brief (<5 min) unrelated interruption inside a card. Checking X for 2 minutes while debugging is a distraction. Spending 15 minutes on X is not a distraction — it's either part of the card's theme or it's a new card. + + Don't label related sub-tasks as distractions. Googling an error message while debugging isn't a distraction, it's part of debugging. + + --- + + ## App Sites + + Identify the main app or website for each card. + + - primary: the main app used in the card (canonical domain, lowercase, no protocol). + - secondary: another meaningful app used, or the enclosing app (e.g., browser). Omit if there isn't a clear one. + + Be specific: docs.google.com not google.com, mail.google.com not google.com. + + Common mappings: + - Figma → figma.com + - Notion → notion.so + - Google Docs → docs.google.com + - Gmail → mail.google.com + - VS Code → code.visualstudio.com + - Xcode → developer.apple.com/xcode + - Twitter/X → x.com + - Zoom → zoom.us + - ChatGPT → chatgpt.com + + --- + + ## Continuity Rules + + Your output cards must cover the same total time range as the previous cards plus any new observations. Think of previous cards as a draft you're revising and extending, not locked history. + + - Don't drop time segments that were previously covered. + - If new observations extend beyond the previous range, add cards to cover the new time. + - Preserve genuine gaps in the source data. + + + Before generating output, review the previous cards and ask: + - Could any two adjacent previous cards be the same activity session? + - Does your first new card continue the last previous card's work? + If yes to either, merge them in your output. + + INPUTS: + Previous cards: \(existingCardsString) + New observations: \(transcriptText) + + Return a JSON array of activity cards that follows the schema: \(schema) + """ + } +} diff --git a/Dayflow/Dayflow/Core/AI/LLMSchema.swift b/Dayflow/Dayflow/Core/AI/LLMSchema.swift new file mode 100644 index 00000000..3585cf38 --- /dev/null +++ b/Dayflow/Dayflow/Core/AI/LLMSchema.swift @@ -0,0 +1,106 @@ +import Foundation + +enum LLMSchema { + static let screenRecordingTranscriptionSchema: String = """ + { + "type": "array", + "items": { + "type": "object", + "properties": { + "startTimestamp": { + "type": "string", + "description": "The start timestamp of the segment in 'MM:SS' format." + }, + "endTimestamp": { + "type": "string", + "description": "The end timestamp of the segment in 'MM:SS' format." + }, + "description": { + "type": "string", + "description": "A 1-3 sentence description of the activity in the segment." + } + }, + "required": ["startTimestamp", "endTimestamp", "description"] + } + } + """ + + static let activityCardsSchema: String = """ + { + "type": "array", + "items": { + "type": "object", + "properties": { + "startTime": { + "type": "string", + "description": "The start time of the activity card in 'h:mm a' format (e.g., '1:12 AM')." + }, + "endTime": { + "type": "string", + "description": "The end time of the activity card in 'h:mm a' format (e.g., '1:30 AM')." + }, + "category": { + "type": "string", + "description": "The category of the activity." + }, + "subcategory": { + "type": "string", + "description": "The subcategory of the activity." + }, + "title": { + "type": "string", + "description": "A concise title for the activity card." + }, + "summary": { + "type": "string", + "description": "A 2-3 sentence summary of the activity." + }, + "detailedSummary": { + "type": "string", + "description": "A detailed, granular log of the activity." + }, + "distractions": { + "type": "array", + "items": { + "type": "object", + "properties": { + "startTime": { + "type": "string", + "description": "The start time of the distraction in 'h:mm a' format." + }, + "endTime": { + "type": "string", + "description": "The end time of the distraction in 'h:mm a' format." + }, + "title": { + "type": "string", + "description": "A title for the distraction." + }, + "summary": { + "type": "string", + "description": "A summary of the distraction." + } + }, + "required": ["startTime", "endTime", "title", "summary"] + } + }, + "appSites": { + "type": "object", + "properties": { + "primary": { + "type": "string", + "description": "The primary app or website used." + }, + "secondary": { + "type": "string", + "description": "The secondary app or website used." + } + }, + "required": ["primary"] + } + }, + "required": ["startTime", "endTime", "category", "title", "summary", "detailedSummary", "appSites"] + } + } + """ +} diff --git a/Dayflow/Dayflow/Core/AI/LLMService.swift b/Dayflow/Dayflow/Core/AI/LLMService.swift index bbf6efd7..360d22f3 100644 --- a/Dayflow/Dayflow/Core/AI/LLMService.swift +++ b/Dayflow/Dayflow/Core/AI/LLMService.swift @@ -137,6 +137,14 @@ final class LLMService: LLMServicing { OllamaProvider(endpoint: endpoint) } + private func makeDoubaoProvider(endpoint: String, modelId: String) -> DoubaoArkProvider? { + if let apiKey = KeychainManager.shared.retrieve(for: "doubao"), !apiKey.isEmpty { + return DoubaoArkProvider(apiKey: apiKey, endpoint: endpoint, modelId: modelId) + } + print("āŒ [LLMService] Failed to retrieve Doubao API key from Keychain") + return nil + } + private func makeChatCLIProvider(preferredToolOverride: ChatCLITool? = nil) -> ChatCLIProvider { let tool: ChatCLITool if let preferredToolOverride { @@ -259,6 +267,29 @@ final class LLMService: LLMServicing { generateActivityCards: provider.generateActivityCards ), fallbackState: nil ) + + case .doubao: + let endpoint: String + if case .doubaoArk(let savedEndpoint) = providerType { + endpoint = savedEndpoint + } else { + endpoint = DoubaoPreferences.defaultBaseURL + } + + let modelId = + (UserDefaults.standard.string(forKey: DoubaoPreferences.modelIdDefaultsKey) ?? "") + .trimmingCharacters(in: .whitespacesAndNewlines) + let resolvedModelId = modelId.isEmpty ? DoubaoPreferences.defaultModelId : modelId + + guard let provider = makeDoubaoProvider(endpoint: endpoint, modelId: resolvedModelId) else { + throw noProviderError() + } + return ( + actions: BatchProviderActions( + transcribeScreenshots: provider.transcribeScreenshots, + generateActivityCards: provider.generateActivityCards + ), fallbackState: nil + ) } } @@ -531,6 +562,19 @@ final class LLMService: LLMServicing { }, generateTextStreaming: provider.generateTextStreaming ) + + case .doubaoArk(let endpoint): + let modelId = + (UserDefaults.standard.string(forKey: DoubaoPreferences.modelIdDefaultsKey) ?? "") + .trimmingCharacters(in: .whitespacesAndNewlines) + let resolvedModelId = modelId.isEmpty ? DoubaoPreferences.defaultModelId : modelId + guard let provider = makeDoubaoProvider(endpoint: endpoint, modelId: resolvedModelId) else { + throw noProviderError() + } + return TextProviderActions( + generateText: provider.generateText, + generateTextStreaming: nil + ) } } diff --git a/Dayflow/Dayflow/Core/AI/LLMTypes.swift b/Dayflow/Dayflow/Core/AI/LLMTypes.swift index 02f997ff..03616a72 100644 --- a/Dayflow/Dayflow/Core/AI/LLMTypes.swift +++ b/Dayflow/Dayflow/Core/AI/LLMTypes.swift @@ -96,6 +96,8 @@ enum LLMProviderType: Codable { case dayflowBackend(endpoint: String = "https://web-production-f3361.up.railway.app") case ollamaLocal(endpoint: String = "http://localhost:11434") case chatGPTClaude + /// Volcengine Ark / Doubao (OpenAI-compatible Chat Completions API) + case doubaoArk(endpoint: String = "https://ark.cn-beijing.volces.com/api/v3") private static let providerDefaultsKey = "llmProviderType" private static let selectedProviderDefaultsKey = "selectedLLMProvider" @@ -134,6 +136,8 @@ enum LLMProviderType: Codable { return "ollama" case .chatGPTClaude: return "chatgpt_claude" + case .doubaoArk: + return "doubao" } } @@ -171,6 +175,8 @@ enum LLMProviderType: Codable { return .chatGPTClaude case "chatgpt_claude": return .chatGPTClaude + case "doubao": + return .doubaoArk() default: return nil } @@ -182,6 +188,7 @@ enum LLMProviderID: String, Codable, CaseIterable { case dayflow case ollama case chatGPTClaude = "chatgpt_claude" + case doubao var analyticsName: String { switch self { @@ -193,6 +200,8 @@ enum LLMProviderID: String, Codable, CaseIterable { return "ollama" case .chatGPTClaude: return "chat_cli" + case .doubao: + return "doubao" } } @@ -206,6 +215,8 @@ enum LLMProviderID: String, Codable, CaseIterable { return .ollama case .chatGPTClaude: return .chatGPTClaude + case .doubaoArk: + return .doubao } } @@ -219,10 +230,20 @@ enum LLMProviderID: String, Codable, CaseIterable { return "local" case .chatGPTClaude: return chatTool == .claude ? "claude" : "chatgpt" + case .doubao: + return "doubao" } } } +enum DoubaoPreferences { + static let baseURLDefaultsKey = "llmDoubaoBaseURL" + static let modelIdDefaultsKey = "llmDoubaoModelId" + + static let defaultBaseURL = "https://ark.cn-beijing.volces.com/api/v3" + static let defaultModelId = "doubao-seed-1-6-flash-250828" +} + enum LLMProviderRoutingPreferences { static let backupProviderDefaultsKey = "llmBackupProviderId" static let backupChatCLIToolDefaultsKey = "llmBackupChatCLITool" diff --git a/Dayflow/Dayflow/Core/Analysis/TimeParsing.swift b/Dayflow/Dayflow/Core/Analysis/TimeParsing.swift index ab9a357d..c98be2b2 100644 --- a/Dayflow/Dayflow/Core/Analysis/TimeParsing.swift +++ b/Dayflow/Dayflow/Core/Analysis/TimeParsing.swift @@ -33,3 +33,391 @@ func parseTimeHMMA(timeString: String) -> Int? { return nil } + +// MARK: - LLM video/timeline helpers + +/// Shared utilities for parsing timestamps emitted by LLMs during video/timelapse transcription. +/// +/// NOTE: These helpers are intentionally lightweight and avoid shared DateFormatter instances +/// because DateFormatter is not thread-safe. +enum LLMVideoTimestampUtilities { + /// Parses either `HH:MM:SS` or `MM:SS` into total seconds. + /// Returns `0` for invalid input. + static func parseVideoTimestamp(_ timestamp: String) -> Int { + let parts = + timestamp + .trimmingCharacters(in: .whitespacesAndNewlines) + .components(separatedBy: ":") + + if parts.count == 3 { + let h = Int(parts[0]) ?? 0 + let m = Int(parts[1]) ?? 0 + let s = Int(parts[2]) ?? 0 + return h * 3600 + m * 60 + s + } + if parts.count == 2 { + let m = Int(parts[0]) ?? 0 + let s = Int(parts[1]) ?? 0 + return m * 60 + s + } + return 0 + } + + static func formatSecondsHHMMSS(_ seconds: TimeInterval) -> String { + let s = Int(seconds.rounded()) + let h = s / 3600 + let m = (s % 3600) / 60 + let sec = s % 60 + return String(format: "%02d:%02d:%02d", h, m, sec) + } +} + +/// Shared validation utilities for ensuring timeline cards fully cover time ranges and +/// don't violate minimum duration constraints. +enum LLMTimelineCardValidation { + struct TimeRange { + let start: Double + let end: Double + } + + static func formatTimestampForPrompt(_ unixTime: Int) -> String { + let date = Date(timeIntervalSince1970: TimeInterval(unixTime)) + let formatter = DateFormatter() + formatter.dateFormat = "h:mm a" + formatter.locale = Locale(identifier: "en_US_POSIX") + formatter.timeZone = TimeZone.current + return formatter.string(from: date) + } + + static func timeToMinutes(_ timeStr: String) -> Double { + let trimmed = timeStr.trimmingCharacters(in: .whitespacesAndNewlines) + if let minutes = parseTimeHMMA(timeString: trimmed) { + return Double(minutes) + } + + // Fallback to MM:SS / HH:MM:SS video-style time. + let seconds = LLMVideoTimestampUtilities.parseVideoTimestamp(trimmed) + return Double(seconds) / 60.0 + } + + static func minutesToTimeString(_ minutes: Double) -> String { + let hours = (Int(minutes) / 60) % 24 + let mins = Int(minutes) % 60 + let period = hours < 12 ? "AM" : "PM" + var displayHour = hours % 12 + if displayHour == 0 { displayHour = 12 } + return String(format: "%d:%02d %@", displayHour, mins, period) + } + + private static func mergeOverlappingRanges(_ ranges: [TimeRange]) -> [TimeRange] { + guard !ranges.isEmpty else { return [] } + let sorted = ranges.sorted { $0.start < $1.start } + var merged: [TimeRange] = [] + for range in sorted { + if merged.isEmpty || range.start > merged.last!.end + 1 { + merged.append(range) + } else { + let last = merged.removeLast() + merged.append(TimeRange(start: last.start, end: max(last.end, range.end))) + } + } + return merged + } + + static func validateTimeCoverage(existingCards: [ActivityCardData], newCards: [ActivityCardData]) + -> (isValid: Bool, error: String?) + { + guard !existingCards.isEmpty else { return (true, nil) } + + var inputRanges: [TimeRange] = [] + for card in existingCards { + let startMin = timeToMinutes(card.startTime) + var endMin = timeToMinutes(card.endTime) + if endMin < startMin { endMin += 24 * 60 } + inputRanges.append(TimeRange(start: startMin, end: endMin)) + } + let mergedInputRanges = mergeOverlappingRanges(inputRanges) + + var outputRanges: [TimeRange] = [] + for card in newCards { + let startMin = timeToMinutes(card.startTime) + var endMin = timeToMinutes(card.endTime) + if endMin < startMin { endMin += 24 * 60 } + guard endMin - startMin >= 0.1 else { continue } + outputRanges.append(TimeRange(start: startMin, end: endMin)) + } + + let flexibility = 3.0 + var uncoveredSegments: [(start: Double, end: Double)] = [] + + for inputRange in mergedInputRanges { + var coveredStart = inputRange.start + var safetyCounter = 10000 + + while coveredStart < inputRange.end && safetyCounter > 0 { + safetyCounter -= 1 + var foundCoverage = false + + for outputRange in outputRanges { + if outputRange.start - flexibility <= coveredStart + && coveredStart <= outputRange.end + flexibility + { + let newCoveredStart = outputRange.end + coveredStart = max(coveredStart + 0.01, newCoveredStart) + foundCoverage = true + break + } + } + + if !foundCoverage { + var nextCovered = inputRange.end + for outputRange in outputRanges { + if outputRange.start > coveredStart && outputRange.start < nextCovered { + nextCovered = outputRange.start + } + } + if nextCovered > coveredStart { + uncoveredSegments.append((start: coveredStart, end: min(nextCovered, inputRange.end))) + coveredStart = nextCovered + } else { + uncoveredSegments.append((start: coveredStart, end: inputRange.end)) + break + } + } + } + + if safetyCounter == 0 { + return ( + false, + "Time coverage validation loop exceeded safety limit - possible infinite loop detected" + ) + } + } + + if !uncoveredSegments.isEmpty { + var uncoveredDesc: [String] = [] + for segment in uncoveredSegments { + let duration = segment.end - segment.start + if duration > flexibility { + uncoveredDesc.append( + "\(minutesToTimeString(segment.start))-\(minutesToTimeString(segment.end)) (\(Int(duration)) min)" + ) + } + } + + if !uncoveredDesc.isEmpty { + var errorMsg = + "Missing coverage for time segments: \(uncoveredDesc.joined(separator: ", "))" + errorMsg += "\n\nšŸ“„ INPUT CARDS:" + for (i, card) in existingCards.enumerated() { + errorMsg += "\n \(i+1). \(card.startTime) - \(card.endTime): \(card.title)" + } + errorMsg += "\n\nšŸ“¤ OUTPUT CARDS:" + for (i, card) in newCards.enumerated() { + errorMsg += "\n \(i+1). \(card.startTime) - \(card.endTime): \(card.title)" + } + return (false, errorMsg) + } + } + + return (true, nil) + } + + static func validateTimeline(_ cards: [ActivityCardData]) -> (isValid: Bool, error: String?) { + for (index, card) in cards.enumerated() { + let startTime = card.startTime + let endTime = card.endTime + + var durationMinutes: Double = 0 + + if let startMin = parseTimeHMMA(timeString: startTime), + let endMinRaw = parseTimeHMMA(timeString: endTime) + { + var endMin = endMinRaw + if endMin < startMin { endMin += 24 * 60 } + durationMinutes = Double(endMin - startMin) + } else { + let startSeconds = LLMVideoTimestampUtilities.parseVideoTimestamp(startTime) + let endSeconds = LLMVideoTimestampUtilities.parseVideoTimestamp(endTime) + durationMinutes = Double(endSeconds - startSeconds) / 60.0 + } + + if durationMinutes < 10 && index < cards.count - 1 { + return ( + false, + "Card \(index + 1) '\(card.title)' is only \(String(format: "%.1f", durationMinutes)) minutes long" + ) + } + } + return (true, nil) + } +} + +// MARK: - LLM transcript helpers + +/// Shared utilities for decoding model-generated transcript JSON and converting it into Dayflow observations. +/// +/// Providers differ in transport, but the transcript payload is intentionally normalized to the same `[{ startTimestamp, endTimestamp, description }]` array. +enum LLMTranscriptUtilities { + struct VideoTranscriptChunk: Codable { + let startTimestamp: String + let endTimestamp: String + let description: String + } + + struct ObservationConversionResult { + let observations: [Observation] + /// Count of chunks dropped due to timestamp validation failures. + let invalidTimestampCount: Int + } + + static func decodeTranscriptChunks(from output: String, allowBracketFallback: Bool = false) throws + -> [VideoTranscriptChunk] + { + + if let data = output.data(using: .utf8), + let parsed = try? JSONDecoder().decode([VideoTranscriptChunk].self, from: data), + !parsed.isEmpty + { + return parsed + } + + guard allowBracketFallback else { + throw NSError( + domain: "LLMTranscriptUtilities", + code: 1, + userInfo: [NSLocalizedDescriptionKey: "Failed to decode transcript JSON array"] + ) + } + + // Bracket-balance extraction (outside string literals): find the last JSON array in the response. + if let slice = extractLastJSONArraySlice(from: output), + let data = slice.data(using: .utf8), + let parsed = try? JSONDecoder().decode([VideoTranscriptChunk].self, from: data), + !parsed.isEmpty + { + return parsed + } + + throw NSError( + domain: "LLMTranscriptUtilities", + code: 2, + userInfo: [ + NSLocalizedDescriptionKey: + "Failed to decode transcript JSON array (fallback extraction did not help)" + ] + ) + } + + static func observations( + from chunks: [VideoTranscriptChunk], + batchStartTime: Date, + observationBatchId: Int64, + llmModel: String, + compressedVideoDuration: TimeInterval, + compressionFactor: TimeInterval, + tolerance: TimeInterval = 10.0, + debugPrintExpansion: Bool = false + ) -> ObservationConversionResult { + var invalidCount = 0 + let observations: [Observation] = chunks.compactMap { chunk in + let compressedStartSeconds = LLMVideoTimestampUtilities.parseVideoTimestamp( + chunk.startTimestamp) + let compressedEndSeconds = LLMVideoTimestampUtilities.parseVideoTimestamp(chunk.endTimestamp) + + if Double(compressedStartSeconds) < -tolerance + || Double(compressedEndSeconds) > compressedVideoDuration + tolerance + { + invalidCount += 1 + return nil + } + + let realStartSeconds = TimeInterval(compressedStartSeconds) * compressionFactor + let realEndSeconds = TimeInterval(compressedEndSeconds) * compressionFactor + + if debugPrintExpansion { + print( + "šŸ“ Timestamp expansion: \(chunk.startTimestamp)-\(chunk.endTimestamp) → \(Int(realStartSeconds))s-\(Int(realEndSeconds))s real" + ) + } + + let startDate = batchStartTime.addingTimeInterval(realStartSeconds) + let endDate = batchStartTime.addingTimeInterval(realEndSeconds) + let trimmed = chunk.description.trimmingCharacters(in: .whitespacesAndNewlines) + guard !trimmed.isEmpty else { return nil } + + return Observation( + id: nil, + batchId: observationBatchId, + startTs: Int(startDate.timeIntervalSince1970), + endTs: Int(endDate.timeIntervalSince1970), + observation: trimmed, + metadata: nil, + llmModel: llmModel, + createdAt: Date() + ) + } + + return ObservationConversionResult( + observations: observations, invalidTimestampCount: invalidCount) + } + + private static func extractLastJSONArraySlice(from str: String) -> String? { + let brackets = bracketPositionsOutsideStrings(in: str) + guard let lastClose = brackets.last(where: { $0.char == "]" }) else { return nil } + + var balance = 0 + for entry in brackets.reversed() { + if entry.index > lastClose.index { continue } + if entry.char == "]" { + balance += 1 + } else if entry.char == "[" { + balance -= 1 + if balance == 0 { + let slice = String(str[entry.index...lastClose.index]).trimmingCharacters( + in: .whitespacesAndNewlines) + return slice + } + } + } + + return nil + } + + private struct BracketEntry { + let char: Character + let index: String.Index + } + + private static func bracketPositionsOutsideStrings(in str: String) -> [BracketEntry] { + var result: [BracketEntry] = [] + var inString = false + var escaped = false + + for idx in str.indices { + let ch = str[idx] + + if inString { + if escaped { + escaped = false + } else if ch == "\\" { + escaped = true + } else if ch == "\"" { + inString = false + } + continue + } + + if ch == "\"" { + inString = true + continue + } + + if ch == "[" || ch == "]" { + result.append(BracketEntry(char: ch, index: idx)) + } + } + + return result + } +} diff --git a/Dayflow/Dayflow/System/AnalyticsService.swift b/Dayflow/Dayflow/System/AnalyticsService.swift index 25ea04f0..acf305b0 100644 --- a/Dayflow/Dayflow/System/AnalyticsService.swift +++ b/Dayflow/Dayflow/System/AnalyticsService.swift @@ -12,6 +12,7 @@ import AppKit import Foundation +import OSLog import PostHog final class AnalyticsService { @@ -25,6 +26,11 @@ final class AnalyticsService { private let throttleLock = NSLock() private var throttles: [String: Date] = [:] + private let localLogger = Logger( + subsystem: Bundle.main.bundleIdentifier ?? "Dayflow", + category: "Analytics" + ) + var isOptedIn: Bool { get { if UserDefaults.standard.object(forKey: optInKey) == nil { @@ -71,7 +77,7 @@ final class AnalyticsService { payload["$set_once"] = ["install_ts": iso8601Now()] UserDefaults.standard.set(true, forKey: "installTsSent") } - PostHogSDK.shared.capture("person_props_updated", properties: payload) + captureToPostHogAndLocal("person_props_updated", properties: payload) } /// Returns the stable PostHog distinct ID used as backend auth identity. @@ -152,8 +158,14 @@ final class AnalyticsService { let payload: [String: Any] = ["$set": sanitize(["analytics_opt_in": enabled])] Task.detached(priority: .utility) { - PostHogSDK.shared.capture("person_props_updated", properties: payload) - PostHogSDK.shared.capture("analytics_opt_in_changed", properties: ["enabled": enabled]) + self.captureToPostHogAndLocal( + "person_props_updated", + properties: payload + ) + self.captureToPostHogAndLocal( + "analytics_opt_in_changed", + properties: ["enabled": enabled] + ) } return } @@ -186,9 +198,7 @@ final class AnalyticsService { func capture(_ name: String, _ props: [String: Any] = [:]) { guard isOptedIn else { return } let sanitized = sanitize(props) - Task.detached(priority: .utility) { - PostHogSDK.shared.capture(name, properties: sanitized) - } + captureToPostHogAndLocal(name, properties: sanitized) } func screen(_ name: String, _ props: [String: Any] = [:]) { @@ -224,11 +234,41 @@ final class AnalyticsService { func setPersonProperties(_ props: [String: Any]) { guard isOptedIn else { return } let payload: [String: Any] = ["$set": sanitize(props)] + captureToPostHogAndLocal("person_props_updated", properties: payload) + } + + // MARK: - Local + PostHog logging + + private func captureToPostHogAndLocal(_ name: String, properties: [String: Any]) { Task.detached(priority: .utility) { - PostHogSDK.shared.capture("person_props_updated", properties: payload) + self.logLocal(name, properties: properties) + PostHogSDK.shared.capture(name, properties: properties) } } + private func logLocal(_ event: String, properties: [String: Any]) { + let json = jsonString(properties) + let line = truncate("[Analytics] \(event) \(json)") + print(line) + localLogger.info("\(line, privacy: .public)") + } + + private func jsonString(_ object: Any) -> String { + if JSONSerialization.isValidJSONObject(object), + let data = try? JSONSerialization.data(withJSONObject: object, options: [.sortedKeys]), + let str = String(data: data, encoding: .utf8) + { + return str + } + return String(describing: object) + } + + private func truncate(_ s: String, max: Int = 4000) -> String { + guard s.count > max else { return s } + let idx = s.index(s.startIndex, offsetBy: max) + return String(s[.. Void) { let now = Date() throttleLock.lock() diff --git a/Dayflow/Dayflow/System/LaunchAtLoginManager.swift b/Dayflow/Dayflow/System/LaunchAtLoginManager.swift index 0f1ce79f..b58b879c 100644 --- a/Dayflow/Dayflow/System/LaunchAtLoginManager.swift +++ b/Dayflow/Dayflow/System/LaunchAtLoginManager.swift @@ -27,20 +27,9 @@ final class LaunchAtLoginManager: ObservableObject { } /// Re-sync with System Settings, e.g. if the user adds/removes Dayflow manually. - /// This is the synchronous version for use in setEnabled() after user action. - func refreshStatus() { - let status = SMAppService.mainApp.status - let enabled = (status == .enabled) - if isEnabled != enabled { - logger.debug( - "Launch at login status changed → \(enabled ? "enabled" : "disabled") [status=\(String(describing: status))]" - ) - } - isEnabled = enabled - } - - /// Async version that runs the XPC call off the main actor to avoid blocking - private func refreshStatusAsync() async { + /// Callers should fire-and-forget via `Task { await refreshStatusAsync() }` or + /// the SwiftUI `.task {}` modifier — never call this directly from a synchronous context. + func refreshStatusAsync() async { // Run XPC call on background thread let status = await Task.detached(priority: .utility) { SMAppService.mainApp.status diff --git a/Dayflow/Dayflow/System/SilentUserDriver.swift b/Dayflow/Dayflow/System/SilentUserDriver.swift index 79856b99..7c0ef44b 100644 --- a/Dayflow/Dayflow/System/SilentUserDriver.swift +++ b/Dayflow/Dayflow/System/SilentUserDriver.swift @@ -3,21 +3,25 @@ import Sparkle // A no-UI user driver that silently installs updates immediately final class SilentUserDriver: NSObject, SPUUserDriver { + var shouldAutoUpdateAndRestart: Bool = true + func show( _ request: SPUUpdatePermissionRequest, reply: @escaping (SUUpdatePermissionResponse) -> Void ) { - print("[Sparkle] Permission request; responding with automatic checks + downloads") + print( + "[Sparkle] Permission request; responding with automatic checks + downloads (shouldAutoUpdateAndRestart=\(shouldAutoUpdateAndRestart))" + ) // Enable automatic checks & downloads by default; do not send system profile let response = SUUpdatePermissionResponse( - automaticUpdateChecks: true, - automaticUpdateDownloading: NSNumber(value: true), + automaticUpdateChecks: shouldAutoUpdateAndRestart, + automaticUpdateDownloading: NSNumber(value: shouldAutoUpdateAndRestart), sendSystemProfile: false ) AnalyticsService.shared.capture( "sparkle_permission_requested", [ - "automatic_checks": true, - "automatic_downloads": true, + "automatic_checks": shouldAutoUpdateAndRestart, + "automatic_downloads": shouldAutoUpdateAndRestart, ]) reply(response) } @@ -31,8 +35,8 @@ final class SilentUserDriver: NSObject, SPUUserDriver { reply: @escaping (SPUUserUpdateChoice) -> Void ) { print("[Sparkle] Update found: \(appcastItem.displayVersionString)") - // Always proceed to install - reply(.install) + // Proceed to install only if auto-update is enabled + reply(shouldAutoUpdateAndRestart ? .install : .skip) } func showUpdateReleaseNotes(with downloadData: SPUDownloadData) { @@ -79,11 +83,11 @@ final class SilentUserDriver: NSObject, SPUUserDriver { } func showReady(toInstallAndRelaunch reply: @escaping (SPUUserUpdateChoice) -> Void) { - print("[Sparkle] Ready to install; allowing termination") + print("[Sparkle] Ready to install (shouldAutoUpdateAndRestart=\(shouldAutoUpdateAndRestart))") Task { @MainActor in - AppDelegate.allowTermination = true + AppDelegate.allowTermination = shouldAutoUpdateAndRestart AnalyticsService.shared.capture("sparkle_install_ready") - reply(.install) + reply(shouldAutoUpdateAndRestart ? .install : .skip) } } diff --git a/Dayflow/Dayflow/System/UpdaterManager.swift b/Dayflow/Dayflow/System/UpdaterManager.swift index 67cd7817..ca724135 100644 --- a/Dayflow/Dayflow/System/UpdaterManager.swift +++ b/Dayflow/Dayflow/System/UpdaterManager.swift @@ -13,7 +13,7 @@ import Sparkle final class UpdaterManager: NSObject, ObservableObject { static let shared = UpdaterManager() - private let userDriver = SilentUserDriver() + let userDriver = SilentUserDriver() private lazy var updater: SPUUpdater = { SPUUpdater( hostBundle: .main, @@ -41,6 +41,10 @@ final class UpdaterManager: NSObject, ObservableObject { private override init() { super.init() + // Initialize user driver from UserDefaults + let automaticUpdatesEnabled = UserDefaults.standard.object(forKey: "automaticUpdatesEnabled") as? Bool ?? true + userDriver.shouldAutoUpdateAndRestart = automaticUpdatesEnabled + // Print what Sparkle thinks the settings are *before* starting: print("[Sparkle] bundleId=\(Bundle.main.bundleIdentifier ?? "nil")") print( @@ -124,7 +128,7 @@ extension UpdaterManager: SPUUpdaterDelegate { nonisolated func updaterWillRelaunchApplication(_ updater: SPUUpdater) { Task { @MainActor in print("[Sparkle] Updater will relaunch application") - AppDelegate.allowTermination = true + AppDelegate.allowTermination = false self.track("sparkle_app_relaunching") } } diff --git a/Dayflow/Dayflow/Views/Onboarding/LLMProviderSetupView.swift b/Dayflow/Dayflow/Views/Onboarding/LLMProviderSetupView.swift index b797ced2..cb46704d 100644 --- a/Dayflow/Dayflow/Views/Onboarding/LLMProviderSetupView.swift +++ b/Dayflow/Dayflow/Views/Onboarding/LLMProviderSetupView.swift @@ -137,7 +137,7 @@ final class StreamingCLI { } struct LLMProviderSetupView: View { - let providerType: String // "ollama" or "gemini" + let providerType: String // "ollama", "gemini", "chatgpt_claude", or "doubao" let onBack: () -> Void let onComplete: () -> Void @@ -149,6 +149,8 @@ struct LLMProviderSetupView: View { return "Use local AI" case "chatgpt_claude": return "Connect ChatGPT or Claude" + case "doubao": + return "Doubao (Ark)" default: return "Gemini" } @@ -458,63 +460,126 @@ struct LLMProviderSetupView: View { case .apiKeyInput: VStack(alignment: .leading, spacing: 24) { - APIKeyInputView( - apiKey: $setupState.apiKey, - title: "Enter your API key:", - subtitle: "Paste your Gemini API key below", - placeholder: "AIza...", - onValidate: { key in - // Basic validation for now - return key.hasPrefix("AIza") && key.count > 30 + if activeProviderType == "doubao" { + APIKeyInputView( + apiKey: $setupState.apiKey, + title: "Enter your API key:", + subtitle: "Paste your Volcengine Ark API key below", + placeholder: "Your Ark API key", + onValidate: { key in + key.trimmingCharacters(in: .whitespacesAndNewlines).count > 20 + } + ) + .onChange(of: setupState.apiKey) { _, _ in + setupState.clearGeminiAPIKeySaveError() } - ) - .onChange(of: setupState.apiKey) { _, _ in - setupState.clearGeminiAPIKeySaveError() - } - if let message = setupState.geminiAPIKeySaveError { - HStack(spacing: 8) { - Image(systemName: "exclamationmark.triangle.fill") - .font(.system(size: 12)) - .foregroundColor(Color(hex: "E91515")) + if let message = setupState.geminiAPIKeySaveError { + HStack(spacing: 8) { + Image(systemName: "exclamationmark.triangle.fill") + .font(.system(size: 12)) + .foregroundColor(Color(hex: "E91515")) - Text(message) - .font(.custom("Nunito", size: 13)) - .foregroundColor(Color(hex: "E91515")) + Text(message) + .font(.custom("Nunito", size: 13)) + .foregroundColor(Color(hex: "E91515")) + } + .padding(.horizontal, 16) + .padding(.vertical, 10) + .background( + RoundedRectangle(cornerRadius: 4) + .fill(Color(hex: "E91515").opacity(0.1)) + ) + .overlay( + RoundedRectangle(cornerRadius: 4) + .stroke(Color(hex: "E91515").opacity(0.3), lineWidth: 1) + ) } - .padding(.horizontal, 16) - .padding(.vertical, 10) - .background( - RoundedRectangle(cornerRadius: 4) - .fill(Color(hex: "E91515").opacity(0.1)) - ) - .overlay( - RoundedRectangle(cornerRadius: 4) - .stroke(Color(hex: "E91515").opacity(0.3), lineWidth: 1) - ) - } - VStack(alignment: .leading, spacing: 12) { - Text( - "Choose your Gemini model. We recommend 3.1 Flash-Lite Preview for the best speed/cost balance, with 3 Flash and 2.5 Flash available as fallbacks." + VStack(alignment: .leading, spacing: 12) { + VStack(alignment: .leading, spacing: 6) { + Text("Ark base URL") + .font(.custom("Nunito", size: 13)) + .foregroundColor(.black.opacity(0.6)) + TextField("https://ark.cn-beijing.volces.com/api/v3", text: $setupState.doubaoBaseURL) + .textFieldStyle(.roundedBorder) + .font(.custom("Nunito", size: 13)) + } + + VStack(alignment: .leading, spacing: 6) { + Text("Model ID") + .font(.custom("Nunito", size: 13)) + .foregroundColor(.black.opacity(0.6)) + TextField("doubao-seed-1-6-flash-250828", text: $setupState.doubaoModelId) + .textFieldStyle(.roundedBorder) + .font(.custom("Nunito", size: 13)) + } + + Text( + "Use a vision-capable Ark model so Dayflow can read screenshots. You can change these later in Settings → AI Provider." + ) + .font(.custom("Nunito", size: 12)) + .foregroundColor(.black.opacity(0.55)) + } + } else { + APIKeyInputView( + apiKey: $setupState.apiKey, + title: "Enter your API key:", + subtitle: "Paste your Gemini API key below", + placeholder: "AIza...", + onValidate: { key in + // Basic validation for now + return key.hasPrefix("AIza") && key.count > 30 + } ) - .font(.custom("Nunito", size: 16)) - .fontWeight(.semibold) - .foregroundColor(.black.opacity(0.85)) + .onChange(of: setupState.apiKey) { _, _ in + setupState.clearGeminiAPIKeySaveError() + } - Picker("Gemini model", selection: $setupState.geminiModel) { - ForEach(GeminiModel.allCases, id: \.self) { model in - Text(model.shortLabel).tag(model) + if let message = setupState.geminiAPIKeySaveError { + HStack(spacing: 8) { + Image(systemName: "exclamationmark.triangle.fill") + .font(.system(size: 12)) + .foregroundColor(Color(hex: "E91515")) + + Text(message) + .font(.custom("Nunito", size: 13)) + .foregroundColor(Color(hex: "E91515")) } + .padding(.horizontal, 16) + .padding(.vertical, 10) + .background( + RoundedRectangle(cornerRadius: 4) + .fill(Color(hex: "E91515").opacity(0.1)) + ) + .overlay( + RoundedRectangle(cornerRadius: 4) + .stroke(Color(hex: "E91515").opacity(0.3), lineWidth: 1) + ) } - .pickerStyle(.segmented) - Text(GeminiModelPreference(primary: setupState.geminiModel).fallbackSummary) - .font(.custom("Nunito", size: 13)) - .foregroundColor(.black.opacity(0.55)) - } - .onChange(of: setupState.geminiModel) { - setupState.persistGeminiModelSelection(source: "onboarding_picker") + VStack(alignment: .leading, spacing: 12) { + Text( + "Choose your Gemini model. We recommend 3.1 Flash-Lite Preview for the best speed/cost balance, with 3 Flash and 2.5 Flash available as fallbacks." + ) + .font(.custom("Nunito", size: 16)) + .fontWeight(.semibold) + .foregroundColor(.black.opacity(0.85)) + + Picker("Gemini model", selection: $setupState.geminiModel) { + ForEach(GeminiModel.allCases, id: \.self) { model in + Text(model.shortLabel).tag(model) + } + } + .pickerStyle(.segmented) + + Text(GeminiModelPreference(primary: setupState.geminiModel).fallbackSummary) + .font(.custom("Nunito", size: 13)) + .foregroundColor(.black.opacity(0.55)) + } + .onChange(of: setupState.geminiModel) { + setupState.persistGeminiModelSelection(source: "onboarding_picker") + } } HStack { @@ -583,6 +648,15 @@ struct LLMProviderSetupView: View { if title == "Testing" || title == "Test Connection" { if providerType == "gemini" { TestConnectionView( + provider: .gemini, + onTestComplete: { success in + setupState.hasTestedConnection = true + setupState.testSuccessful = success + } + ) + } else if providerType == "doubao" { + TestConnectionView( + provider: .doubao, onTestComplete: { success in setupState.hasTestedConnection = true setupState.testSuccessful = success @@ -657,11 +731,12 @@ struct LLMProviderSetupView: View { case .apiKeyInstructions: VStack(alignment: .leading, spacing: 24) { - VStack(alignment: .leading, spacing: 8) { - Text("Get your Gemini API key") - .font(.custom("Nunito", size: 24)) - .fontWeight(.semibold) - .foregroundColor(.black.opacity(0.9)) + if activeProviderType == "doubao" { + VStack(alignment: .leading, spacing: 8) { + Text("Get your Ark API key") + .font(.custom("Nunito", size: 24)) + .fontWeight(.semibold) + .foregroundColor(.black.opacity(0.9)) Text( "allows you to run Dayflow for free. All you need is a Google account - no credit card required." @@ -669,72 +744,157 @@ struct LLMProviderSetupView: View { .font(.custom("Nunito", size: 14)) .foregroundColor(.black.opacity(0.6)) } + Text( + "Doubao runs via Volcengine Ark. You'll need an API key from your Volcengine account." + ) + .font(.custom("Nunito", size: 14)) + .foregroundColor(.black.opacity(0.6)) - VStack(alignment: .leading, spacing: 16) { - HStack(alignment: .top, spacing: 12) { - Text("1.") - .font(.custom("Nunito", size: 14)) - .foregroundColor(.black.opacity(0.6)) - .frame(width: 20, alignment: .leading) + VStack(alignment: .leading, spacing: 16) { + HStack(alignment: .top, spacing: 12) { + Text("1.") + .font(.custom("Nunito", size: 14)) + .foregroundColor(.black.opacity(0.6)) + .frame(width: 20, alignment: .leading) + + Group { + Text("Open the Volcengine console ") + .font(.custom("Nunito", size: 14)) + .foregroundColor(.black.opacity(0.8)) + + Text("(console.volcengine.com)") + .font(.custom("Nunito", size: 14)) + .foregroundColor(Color(red: 1, green: 0.42, blue: 0.02)) + .underline() + } + .onTapGesture { openVolcengineConsole() } + .pointingHandCursor() + } + + HStack(alignment: .top, spacing: 12) { + Text("2.") + .font(.custom("Nunito", size: 14)) + .foregroundColor(.black.opacity(0.6)) + .frame(width: 20, alignment: .leading) - Group { - Text("Visit Google AI Studio ") + Text("Navigate to Ark and create an API key") .font(.custom("Nunito", size: 14)) .foregroundColor(.black.opacity(0.8)) - + Text("(aistudio.google.com)") + } + + HStack(alignment: .top, spacing: 12) { + Text("3.") .font(.custom("Nunito", size: 14)) - .foregroundColor(Color(red: 1, green: 0.42, blue: 0.02)) - .underline() + .foregroundColor(.black.opacity(0.6)) + .frame(width: 20, alignment: .leading) + + Text("Copy the key and paste it on the next step") + .font(.custom("Nunito", size: 14)) + .foregroundColor(.black.opacity(0.8)) } - .onTapGesture { openGoogleAIStudio() } - .pointingHandCursor() } + .padding(.vertical, 12) - HStack(alignment: .top, spacing: 12) { - Text("2.") - .font(.custom("Nunito", size: 14)) - .foregroundColor(.black.opacity(0.6)) - .frame(width: 20, alignment: .leading) + HStack { + DayflowSurfaceButton( + action: openVolcengineConsole, + content: { + HStack(spacing: 8) { + Image(systemName: "safari").font(.system(size: 14)) + Text("Open Volcengine Console").font(.custom("Nunito", size: 14)).fontWeight( + .semibold) + } + }, + background: Color(red: 0.25, green: 0.17, blue: 0), + foreground: .white, + borderColor: .clear, + cornerRadius: 8, + horizontalPadding: 24, + verticalPadding: 12, + showOverlayStroke: true + ) + Spacer() + nextButton + } + } else { + VStack(alignment: .leading, spacing: 8) { + Text("Get your Gemini API key") + .font(.custom("Nunito", size: 24)) + .fontWeight(.semibold) + .foregroundColor(.black.opacity(0.9)) - Text("Click \"Get API key\" in the top right") - .font(.custom("Nunito", size: 14)) - .foregroundColor(.black.opacity(0.8)) + Text( + "Google's Gemini offers a generous free tier that should allow you to run Dayflow ~15 hours a day for free - no credit card required" + ) + .font(.custom("Nunito", size: 14)) + .foregroundColor(.black.opacity(0.6)) } - HStack(alignment: .top, spacing: 12) { - Text("3.") - .font(.custom("Nunito", size: 14)) - .foregroundColor(.black.opacity(0.6)) - .frame(width: 20, alignment: .leading) + VStack(alignment: .leading, spacing: 16) { + HStack(alignment: .top, spacing: 12) { + Text("1.") + .font(.custom("Nunito", size: 14)) + .foregroundColor(.black.opacity(0.6)) + .frame(width: 20, alignment: .leading) + + Group { + Text("Visit Google AI Studio ") + .font(.custom("Nunito", size: 14)) + .foregroundColor(.black.opacity(0.8)) + + Text("(aistudio.google.com)") + .font(.custom("Nunito", size: 14)) + .foregroundColor(Color(red: 1, green: 0.42, blue: 0.02)) + .underline() + } + .onTapGesture { openGoogleAIStudio() } + .pointingHandCursor() + } - Text("Create a new API key and copy it") - .font(.custom("Nunito", size: 14)) - .foregroundColor(.black.opacity(0.8)) + HStack(alignment: .top, spacing: 12) { + Text("2.") + .font(.custom("Nunito", size: 14)) + .foregroundColor(.black.opacity(0.6)) + .frame(width: 20, alignment: .leading) + + Text("Click \"Get API key\" in the top right") + .font(.custom("Nunito", size: 14)) + .foregroundColor(.black.opacity(0.8)) + } + + HStack(alignment: .top, spacing: 12) { + Text("3.") + .font(.custom("Nunito", size: 14)) + .foregroundColor(.black.opacity(0.6)) + .frame(width: 20, alignment: .leading) + + Text("Create a new API key and copy it") + .font(.custom("Nunito", size: 14)) + .foregroundColor(.black.opacity(0.8)) + } } - } - .padding(.vertical, 12) + .padding(.vertical, 12) - // Buttons row with Open Google AI Studio on left, Next on right - HStack { - DayflowSurfaceButton( - action: openGoogleAIStudio, - content: { - HStack(spacing: 8) { - Image(systemName: "safari").font(.system(size: 14)) - Text("Open Google AI Studio").font(.custom("Nunito", size: 14)).fontWeight( - .semibold) - } - }, - background: Color(red: 0.25, green: 0.17, blue: 0), - foreground: .white, - borderColor: .clear, - cornerRadius: 8, - horizontalPadding: 24, - verticalPadding: 12, - showOverlayStroke: true - ) - Spacer() - nextButton + // Buttons row with Open Google AI Studio on left, Next on right + HStack { + DayflowSurfaceButton( + action: openGoogleAIStudio, + content: { + HStack(spacing: 8) { + Image(systemName: "safari").font(.system(size: 14)) + Text("Open Google AI Studio").font(.custom("Nunito", size: 14)).fontWeight( + .semibold) + } + }, + background: Color(red: 0.25, green: 0.17, blue: 0), + foreground: .white, + borderColor: .clear, + cornerRadius: 8, + horizontalPadding: 24, + verticalPadding: 12, + showOverlayStroke: true + ) + Spacer() + nextButton + } } } } @@ -775,6 +935,26 @@ struct LLMProviderSetupView: View { GeminiModelPreference(primary: setupState.geminiModel).save() } + if activeProviderType == "doubao" { + let trimmedKey = setupState.apiKey.trimmingCharacters(in: .whitespacesAndNewlines) + if !trimmedKey.isEmpty { + KeychainManager.shared.store(trimmedKey, for: "doubao") + } + + let baseURL = setupState.doubaoBaseURL.trimmingCharacters(in: .whitespacesAndNewlines) + let resolvedBaseURL = baseURL.isEmpty ? DoubaoPreferences.defaultBaseURL : baseURL + let modelId = setupState.doubaoModelId.trimmingCharacters(in: .whitespacesAndNewlines) + let resolvedModelId = modelId.isEmpty ? DoubaoPreferences.defaultModelId : modelId + + let type = LLMProviderType.doubaoArk(endpoint: resolvedBaseURL) + if let encoded = try? JSONEncoder().encode(type) { + UserDefaults.standard.set(encoded, forKey: "llmProviderType") + } + UserDefaults.standard.set("doubao", forKey: "selectedLLMProvider") + UserDefaults.standard.set(resolvedBaseURL, forKey: DoubaoPreferences.baseURLDefaultsKey) + UserDefaults.standard.set(resolvedModelId, forKey: DoubaoPreferences.modelIdDefaultsKey) + } + // Save local endpoint for local engine selection if activeProviderType == "ollama" { persistLocalSettings() @@ -814,6 +994,12 @@ struct LLMProviderSetupView: View { } } + private func openVolcengineConsole() { + if let url = URL(string: "https://console.volcengine.com/") { + NSWorkspace.shared.open(url) + } + } + private func openLMStudioDownload() { if let url = URL(string: "https://lmstudio.ai/") { NSWorkspace.shared.open(url) @@ -854,6 +1040,13 @@ class ProviderSetupState: ObservableObject { @Published var hasTestedConnection: Bool = false @Published var testSuccessful: Bool = false @Published var geminiModel: GeminiModel + // Doubao / Ark configuration + @Published var doubaoBaseURL: String = + (UserDefaults.standard.string(forKey: DoubaoPreferences.baseURLDefaultsKey) + ?? DoubaoPreferences.defaultBaseURL) + @Published var doubaoModelId: String = + (UserDefaults.standard.string(forKey: DoubaoPreferences.modelIdDefaultsKey) + ?? DoubaoPreferences.defaultModelId) // Local engine configuration @Published var localEngine: LocalEngine = .lmstudio @Published var localBaseURL: String = LocalEngine.lmstudio.defaultBaseURL @@ -872,6 +1065,7 @@ class ProviderSetupState: ObservableObject { private var lastSavedGeminiModel: GeminiModel private var hasStartedCLICheck = false + private var activeProviderId: String = "gemini" init() { let preference = GeminiModelPreference.load() self.geminiModel = preference.primary @@ -889,7 +1083,14 @@ class ProviderSetupState: ObservableObject { var canContinue: Bool { switch currentStep.contentType { case .apiKeyInput: - return !apiKey.isEmpty && apiKey.count > 20 + let trimmedKey = apiKey.trimmingCharacters(in: .whitespacesAndNewlines) + guard !trimmedKey.isEmpty, trimmedKey.count > 20 else { return false } + if activeProviderId == "doubao" { + let base = doubaoBaseURL.trimmingCharacters(in: .whitespacesAndNewlines) + let model = doubaoModelId.trimmingCharacters(in: .whitespacesAndNewlines) + return !base.isEmpty && !model.isEmpty + } + return true case .cliDetection: return isSelectedCLIToolReady case .information(_, _): @@ -908,6 +1109,7 @@ class ProviderSetupState: ObservableObject { } func configureSteps(for provider: String) { + activeProviderId = provider switch provider { case "ollama": steps = [ @@ -971,7 +1173,29 @@ class ProviderSetupState: ObservableObject { claudeCLIReport = nil isCheckingCLIStatus = false hasStartedCLICheck = false + case "doubao": + steps = [ + SetupStep(id: "getkey", title: "Get API key", contentType: .apiKeyInstructions), + SetupStep(id: "enterkey", title: "Enter API key", contentType: .apiKeyInput), + SetupStep( + id: "verify", + title: "Test connection", + contentType: .information( + "Test Connection", + "Click the button below to verify your API key works with Volcengine Ark." + ) + ), + SetupStep( + id: "complete", + title: "Complete", + contentType: .information( + "All set!", + "Doubao (Ark) is now configured and ready to use with Dayflow." + ) + ), + ] default: // gemini + activeProviderId = "gemini" steps = [ SetupStep( id: "getkey", title: "Get API key", @@ -994,7 +1218,7 @@ class ProviderSetupState: ObservableObject { func goNext() { // Save API key to keychain when moving from API key input step if currentStep.contentType.isApiKeyInput && !apiKey.isEmpty { - guard persistGeminiAPIKey(source: "onboarding_step") else { return } + guard persistActiveProviderAPIKey(source: "onboarding_step") else { return } } if currentStepIndex < steps.count - 1 { @@ -1011,7 +1235,7 @@ class ProviderSetupState: ObservableObject { func navigateToStep(_ stepId: String) { if let index = steps.firstIndex(where: { $0.id == stepId }) { if currentStep.contentType.isApiKeyInput && stepId != currentStep.id { - guard persistGeminiAPIKey(source: "onboarding_sidebar") else { return } + guard persistActiveProviderAPIKey(source: "onboarding_sidebar") else { return } } // Reset test state when navigating to test step if stepId == "verify" || stepId == "test" { @@ -1077,6 +1301,51 @@ class ProviderSetupState: ObservableObject { return stored } + @discardableResult + func persistActiveProviderAPIKey(source: String) -> Bool { + if activeProviderId == "doubao" { + return persistDoubaoAPIKey(source: source) + } + return persistGeminiAPIKey(source: source) + } + + @discardableResult + private func persistDoubaoAPIKey(source: String) -> Bool { + let trimmed = apiKey.trimmingCharacters(in: .whitespacesAndNewlines) + guard !trimmed.isEmpty else { + geminiAPIKeySaveError = nil + return true + } + + if trimmed != apiKey { + apiKey = trimmed + } + + let stored = KeychainManager.shared.store(trimmed, for: "doubao") + if stored { + geminiAPIKeySaveError = nil + hasTestedConnection = false + testSuccessful = false + + let base = doubaoBaseURL.trimmingCharacters(in: .whitespacesAndNewlines) + let resolvedBase = base.isEmpty ? DoubaoPreferences.defaultBaseURL : base + let model = doubaoModelId.trimmingCharacters(in: .whitespacesAndNewlines) + let resolvedModel = model.isEmpty ? DoubaoPreferences.defaultModelId : model + + UserDefaults.standard.set(resolvedBase, forKey: DoubaoPreferences.baseURLDefaultsKey) + UserDefaults.standard.set(resolvedModel, forKey: DoubaoPreferences.modelIdDefaultsKey) + UserDefaults.standard.set("doubao", forKey: "selectedLLMProvider") + if let encoded = try? JSONEncoder().encode(LLMProviderType.doubaoArk(endpoint: resolvedBase)) + { + UserDefaults.standard.set(encoded, forKey: "llmProviderType") + } + } else { + geminiAPIKeySaveError = + "Couldn't save your API key to Keychain. Please unlock Keychain and try again." + } + return stored + } + private var isSelectedCLIToolReady: Bool { guard let preferredCLITool else { return false } return isToolAvailable(preferredCLITool) diff --git a/Dayflow/Dayflow/Views/Onboarding/OnboardingLLMSelectionView.swift b/Dayflow/Dayflow/Views/Onboarding/OnboardingLLMSelectionView.swift index 4bc175c1..6c9d9bdb 100644 --- a/Dayflow/Dayflow/Views/Onboarding/OnboardingLLMSelectionView.swift +++ b/Dayflow/Dayflow/Views/Onboarding/OnboardingLLMSelectionView.swift @@ -35,7 +35,9 @@ struct OnboardingLLMSelectionView: View { // Card width calc (no min width, cap at 480) let availableWidth = windowWidth - (edgePadding * 2) - let rawCardWidth = (availableWidth - (cardGap * 2)) / 3 + let cardCount = max(1, providerCards.count) + let totalGaps = cardGap * CGFloat(max(0, cardCount - 1)) + let rawCardWidth = (availableWidth - totalGaps) / CGFloat(cardCount) let cardWidth = max(1, min(480, floor(rawCardWidth))) // Card height calc @@ -188,6 +190,39 @@ struct OnboardingLLMSelectionView: View { } ), + // Doubao (Ark) card + FlexibleProviderCard( + id: "doubao", + title: "Doubao (Ark)", + badgeText: "BYOK", + badgeType: .blue, + icon: "globe.asia.australia", + features: [ + ("Uses Volcengine Ark's OpenAI-compatible Chat Completions API", true), + ("Works with vision-capable Ark models for screen understanding", true), + ("Bring your own API key", true), + ("May require a paid Volcengine account", false), + ], + isSelected: selectedProvider == "doubao", + buttonMode: .onboarding(onProceed: { + if selectedProvider == "doubao" { + saveProviderSelection() + onNext("doubao") + } else { + withAnimation(.spring(response: 0.3, dampingFraction: 0.9)) { + didUserSelectProvider = true + selectedProvider = "doubao" + } + } + }), + onSelect: { + withAnimation(.spring(response: 0.3, dampingFraction: 0.9)) { + didUserSelectProvider = true + selectedProvider = "doubao" + } + } + ), + // ChatGPT/Claude CLI card FlexibleProviderCard( id: "chatgpt_claude", @@ -273,6 +308,8 @@ struct OnboardingLLMSelectionView: View { providerType = .dayflowBackend() case "chatgpt_claude": providerType = .chatGPTClaude + case "doubao": + providerType = .doubaoArk() default: providerType = .geminiDirect } diff --git a/Dayflow/Dayflow/Views/Onboarding/TestConnectionView.swift b/Dayflow/Dayflow/Views/Onboarding/TestConnectionView.swift index d3e4c7ab..4e7ac76b 100644 --- a/Dayflow/Dayflow/Views/Onboarding/TestConnectionView.swift +++ b/Dayflow/Dayflow/Views/Onboarding/TestConnectionView.swift @@ -2,17 +2,20 @@ // TestConnectionView.swift // Dayflow // -// Test connection button for Gemini API +// Test connection button for supported API providers // +import Foundation import SwiftUI struct TestConnectionView: View { + let provider: LLMProviderID let onTestComplete: ((Bool) -> Void)? @State private var isTesting = false @State private var testResult: TestResult? - init(onTestComplete: ((Bool) -> Void)? = nil) { + init(provider: LLMProviderID = .gemini, onTestComplete: ((Bool) -> Void)? = nil) { + self.provider = provider self.onTestComplete = onTestComplete } @@ -118,37 +121,106 @@ struct TestConnectionView: View { private func testConnection() { guard !isTesting else { return } - // Get API key from keychain - guard let apiKey = KeychainManager.shared.retrieve(for: "gemini") else { - testResult = .failure("No API key found. Please enter your API key first.") + let analyticsProvider = provider.analyticsName + + func finishFailure(_ message: String, errorCode: String? = nil) { + testResult = .failure(message) onTestComplete?(false) - AnalyticsService.shared.capture( - "connection_test_failed", ["provider": "gemini", "error_code": "no_api_key"]) + var props: [String: Any] = ["provider": analyticsProvider] + if let errorCode { + props["error_code"] = errorCode + } + AnalyticsService.shared.capture("connection_test_failed", props) + } + + func finishSuccess(_ message: String) { + testResult = .success(message) + isTesting = false + onTestComplete?(true) + AnalyticsService.shared.capture("connection_test_succeeded", ["provider": analyticsProvider]) + } + + // Get API key from keychain + let keychainKey: String + switch provider { + case .gemini: + keychainKey = "gemini" + case .doubao: + keychainKey = "doubao" + default: + finishFailure( + "This provider doesn't support connection tests yet.", errorCode: "unsupported_provider") + return + } + + guard let apiKey = KeychainManager.shared.retrieve(for: keychainKey), + !apiKey.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty + else { + finishFailure("No API key found. Please enter your API key first.", errorCode: "no_api_key") return } isTesting = true testResult = nil - AnalyticsService.shared.capture("connection_test_started", ["provider": "gemini"]) + AnalyticsService.shared.capture("connection_test_started", ["provider": analyticsProvider]) Task { do { - let _ = try await GeminiAPIHelper.shared.testConnection(apiKey: apiKey) - await MainActor.run { - testResult = .success("Connection successful! Your API key is working.") - isTesting = false - onTestComplete?(true) + switch provider { + case .gemini: + let _ = try await GeminiAPIHelper.shared.testConnection(apiKey: apiKey) + await MainActor.run { + finishSuccess("Connection successful! Your API key is working.") + } + case .doubao: + let endpoint: String = { + if let data = UserDefaults.standard.data(forKey: "llmProviderType"), + let providerType = try? JSONDecoder().decode(LLMProviderType.self, from: data), + case .doubaoArk(let savedEndpoint) = providerType + { + return savedEndpoint + } + let stored = + (UserDefaults.standard.string(forKey: DoubaoPreferences.baseURLDefaultsKey) ?? "") + .trimmingCharacters(in: .whitespacesAndNewlines) + return stored.isEmpty ? DoubaoPreferences.defaultBaseURL : stored + }() + + let modelId = + (UserDefaults.standard.string(forKey: DoubaoPreferences.modelIdDefaultsKey) ?? "") + .trimmingCharacters(in: .whitespacesAndNewlines) + let resolvedModelId = modelId.isEmpty ? DoubaoPreferences.defaultModelId : modelId + + let doubaoProvider = DoubaoArkProvider( + apiKey: apiKey, endpoint: endpoint, modelId: resolvedModelId) + let (text, _) = try await doubaoProvider.generateText( + prompt: "Reply with the single word OK.") + let normalized = text.trimmingCharacters(in: .whitespacesAndNewlines).uppercased() + if normalized.contains("OK") { + await MainActor.run { + finishSuccess("Connection successful! Your API key is working.") + } + } else { + await MainActor.run { + isTesting = false + finishFailure( + "Connected, but got an unexpected response: \(text)", + errorCode: "unexpected_response") + } + } + default: + await MainActor.run { + isTesting = false + finishFailure( + "This provider doesn't support connection tests yet.", + errorCode: "unsupported_provider") + } } - AnalyticsService.shared.capture("connection_test_succeeded", ["provider": "gemini"]) } catch { await MainActor.run { - testResult = .failure(error.localizedDescription) isTesting = false - onTestComplete?(false) + finishFailure(error.localizedDescription, errorCode: String((error as NSError).code)) } - AnalyticsService.shared.capture( - "connection_test_failed", - ["provider": "gemini", "error_code": String((error as NSError).code)]) } } } diff --git a/Dayflow/Dayflow/Views/UI/Settings/OtherSettingsViewModel.swift b/Dayflow/Dayflow/Views/UI/Settings/OtherSettingsViewModel.swift index 64697fe3..65962bcd 100644 --- a/Dayflow/Dayflow/Views/UI/Settings/OtherSettingsViewModel.swift +++ b/Dayflow/Dayflow/Views/UI/Settings/OtherSettingsViewModel.swift @@ -30,6 +30,14 @@ final class OtherSettingsViewModel: ObservableObject { TimelapsePreferences.saveAllTimelapsesToDisk = saveAllTimelapsesToDisk } } + @Published var automaticUpdatesEnabled: Bool { + didSet { + guard automaticUpdatesEnabled != oldValue else { return } + UserDefaults.standard.set(automaticUpdatesEnabled, forKey: "automaticUpdatesEnabled") + // Update UpdaterManager's user driver + UpdaterManager.shared.userDriver.shouldAutoUpdateAndRestart = automaticUpdatesEnabled + } + } @Published var outputLanguageOverride: String @Published var isOutputLanguageOverrideSaved: Bool = true @@ -50,6 +58,7 @@ final class OtherSettingsViewModel: ObservableObject { showTimelineAppIcons = UserDefaults.standard.object(forKey: "showTimelineAppIcons") as? Bool ?? true saveAllTimelapsesToDisk = TimelapsePreferences.saveAllTimelapsesToDisk + automaticUpdatesEnabled = UserDefaults.standard.object(forKey: "automaticUpdatesEnabled") as? Bool ?? true outputLanguageOverride = LLMOutputLanguagePreferences.override exportStartDate = timelineDisplayDate(from: Date()) exportEndDate = timelineDisplayDate(from: Date()) diff --git a/Dayflow/Dayflow/Views/UI/Settings/ProvidersSettingsViewModel.swift b/Dayflow/Dayflow/Views/UI/Settings/ProvidersSettingsViewModel.swift index 1e200b3e..d5616d69 100644 --- a/Dayflow/Dayflow/Views/UI/Settings/ProvidersSettingsViewModel.swift +++ b/Dayflow/Dayflow/Views/UI/Settings/ProvidersSettingsViewModel.swift @@ -260,6 +260,8 @@ final class ProvidersSettingsViewModel: ObservableObject { currentProvider = "ollama" case .chatGPTClaude: currentProvider = "chatgpt_claude" + case .doubaoArk: + currentProvider = "doubao" } hasLoadedProvider = true } @@ -473,6 +475,16 @@ final class ProvidersSettingsViewModel: ObservableObject { let preferredTool = (UserDefaults.standard.string(forKey: "chatCLIPreferredTool") ?? "") .trimmingCharacters(in: .whitespacesAndNewlines) return !preferredTool.isEmpty + case "doubao": + if UserDefaults.standard.bool(forKey: "doubaoSetupComplete") { + return true + } + let key = (KeychainManager.shared.retrieve(for: "doubao") ?? "") + .trimmingCharacters(in: .whitespacesAndNewlines) + let modelId = + (UserDefaults.standard.string(forKey: DoubaoPreferences.modelIdDefaultsKey) ?? "") + .trimmingCharacters(in: .whitespacesAndNewlines) + return !key.isEmpty && !modelId.isEmpty default: return false } @@ -524,6 +536,13 @@ final class ProvidersSettingsViewModel: ObservableObject { providerType = .dayflowBackend() case "chatgpt_claude": providerType = .chatGPTClaude + case "doubao": + let endpoint = + (UserDefaults.standard.string(forKey: DoubaoPreferences.baseURLDefaultsKey) + ?? DoubaoPreferences.defaultBaseURL) + .trimmingCharacters(in: .whitespacesAndNewlines) + let resolvedEndpoint = endpoint.isEmpty ? DoubaoPreferences.defaultBaseURL : endpoint + providerType = .doubaoArk(endpoint: resolvedEndpoint) default: return } @@ -605,6 +624,17 @@ final class ProvidersSettingsViewModel: ObservableObject { } else if providerId == "chatgpt_claude" { props["chat_cli_tool"] = UserDefaults.standard.string(forKey: "chatCLIPreferredTool") ?? "unknown" + } else if providerId == "doubao" { + let baseURL = + UserDefaults.standard.string(forKey: DoubaoPreferences.baseURLDefaultsKey) ?? "unknown" + let modelId = + UserDefaults.standard.string(forKey: DoubaoPreferences.modelIdDefaultsKey) ?? "unknown" + let hasKey = !(KeychainManager.shared.retrieve(for: "doubao") ?? "").trimmingCharacters( + in: .whitespacesAndNewlines + ).isEmpty + props["base_url"] = baseURL + props["model_id"] = modelId + props["has_api_key"] = hasKey } AnalyticsService.shared.capture("provider_setup_completed", props) } @@ -642,6 +672,14 @@ final class ProvidersSettingsViewModel: ObservableObject { badgeType: .orange, icon: "gemini_asset" ), + CompactProviderInfo( + id: "doubao", + title: "Doubao (Ark)", + summary: "Volcengine Ark • OpenAI-compatible • bring your own key", + badgeText: "BYOK", + badgeType: .blue, + icon: "globe.asia.australia" + ), CompactProviderInfo( id: "chatgpt", title: "ChatGPT", @@ -679,6 +717,15 @@ final class ProvidersSettingsViewModel: ObservableObject { return "\(engineName) - \(truncatedModel)" case "gemini": return selectedGeminiModel.displayName + case "doubao": + let model = + (UserDefaults.standard.string(forKey: DoubaoPreferences.modelIdDefaultsKey) + ?? DoubaoPreferences.defaultModelId) + .trimmingCharacters(in: .whitespacesAndNewlines) + let displayModel = model.isEmpty ? DoubaoPreferences.defaultModelId : model + let truncated = + displayModel.count > 30 ? String(displayModel.prefix(27)) + "..." : displayModel + return "Ark - \(truncated)" case "chatgpt_claude": if providerId == "chatgpt" { return "ChatGPT – Codex CLI" @@ -710,6 +757,8 @@ final class ProvidersSettingsViewModel: ObservableObject { return "Gemini API" case "ollama": return "Local API" + case "doubao": + return "Doubao (Ark) API" case "chatgpt_claude": if let tool = preferredCLITool { return "\(tool.shortName) CLI" @@ -724,6 +773,7 @@ final class ProvidersSettingsViewModel: ObservableObject { switch id { case "ollama": return "Local" case "gemini": return "Gemini" + case "doubao": return "Doubao" case "chatgpt": return "ChatGPT" case "claude": return "Claude" case "chatgpt_claude": @@ -768,7 +818,7 @@ final class ProvidersSettingsViewModel: ObservableObject { func loadGeminiPromptOverridesIfNeeded(force: Bool = false) { if geminiPromptOverridesLoaded && !force { return } isUpdatingGeminiPromptState = true - let overrides = GeminiPromptPreferences.load() + let overrides = VideoPromptPreferences.load() let trimmedTitle = overrides.titleBlock?.trimmingCharacters(in: .whitespacesAndNewlines) let trimmedSummary = overrides.summaryBlock?.trimmingCharacters(in: .whitespacesAndNewlines) @@ -792,7 +842,7 @@ final class ProvidersSettingsViewModel: ObservableObject { } func persistGeminiPromptOverrides() { - let overrides = GeminiPromptOverrides( + let overrides = VideoPromptOverrides( titleBlock: normalizedOverride( text: geminiTitlePromptText, enabled: useCustomGeminiTitlePrompt), summaryBlock: normalizedOverride( @@ -802,9 +852,9 @@ final class ProvidersSettingsViewModel: ObservableObject { ) if overrides.isEmpty { - GeminiPromptPreferences.reset() + VideoPromptPreferences.reset() } else { - GeminiPromptPreferences.save(overrides) + VideoPromptPreferences.save(overrides) } } @@ -816,7 +866,7 @@ final class ProvidersSettingsViewModel: ObservableObject { geminiTitlePromptText = GeminiPromptDefaults.titleBlock geminiSummaryPromptText = GeminiPromptDefaults.summaryBlock geminiDetailedPromptText = GeminiPromptDefaults.detailedSummaryBlock - GeminiPromptPreferences.reset() + VideoPromptPreferences.reset() isUpdatingGeminiPromptState = false geminiPromptOverridesLoaded = true } diff --git a/Dayflow/Dayflow/Views/UI/Settings/SettingsOtherTabView.swift b/Dayflow/Dayflow/Views/UI/Settings/SettingsOtherTabView.swift index 6455d82e..e7bdfa3b 100644 --- a/Dayflow/Dayflow/Views/UI/Settings/SettingsOtherTabView.swift +++ b/Dayflow/Dayflow/Views/UI/Settings/SettingsOtherTabView.swift @@ -28,6 +28,18 @@ struct SettingsOtherTabView: View { .font(.custom("Nunito", size: 11.5)) .foregroundColor(.black.opacity(0.5)) + Toggle(isOn: $viewModel.automaticUpdatesEnabled) { + Text("Automatic app updates") + .font(.custom("Nunito", size: 13)) + .foregroundColor(.black.opacity(0.7)) + } + .toggleStyle(.switch) + .pointingHandCursor() + + Text("When on, Dayflow automatically checks for and installs updates in the background.") + .font(.custom("Nunito", size: 11.5)) + .foregroundColor(.black.opacity(0.5)) + Toggle(isOn: $viewModel.analyticsEnabled) { Text("Share crash reports and anonymous usage data") .font(.custom("Nunito", size: 13)) diff --git a/Dayflow/Dayflow/Views/UI/Settings/SettingsProvidersTabView.swift b/Dayflow/Dayflow/Views/UI/Settings/SettingsProvidersTabView.swift index 0d8d0096..0da530b9 100644 --- a/Dayflow/Dayflow/Views/UI/Settings/SettingsProvidersTabView.swift +++ b/Dayflow/Dayflow/Views/UI/Settings/SettingsProvidersTabView.swift @@ -92,7 +92,9 @@ struct SettingsProvidersTabView: View { switch viewModel.currentProvider { case "gemini": - TestConnectionView(onTestComplete: { _ in }) + TestConnectionView(provider: .gemini, onTestComplete: { _ in }) + case "doubao": + TestConnectionView(provider: .doubao, onTestComplete: { _ in }) case "ollama": LocalLLMTestView( baseURL: $viewModel.localBaseURL, diff --git a/Dayflow/Dayflow/Views/UI/SettingsView.swift b/Dayflow/Dayflow/Views/UI/SettingsView.swift index 450c26cd..c0d744c6 100644 --- a/Dayflow/Dayflow/Views/UI/SettingsView.swift +++ b/Dayflow/Dayflow/Views/UI/SettingsView.swift @@ -102,7 +102,9 @@ struct SettingsView: View { otherViewModel.refreshAnalyticsState() storageViewModel.refreshStorageIfNeeded(isStorageTab: selectedTab == .storage) AnalyticsService.shared.capture("settings_opened") - launchAtLoginManager.refreshStatus() + } + .task { + await launchAtLoginManager.refreshStatusAsync() } .onChange(of: selectedTab) { _, newValue in if newValue == .storage {