diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index cd83f58be1..afdd8f9d82 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -1405,6 +1405,43 @@ importers: specifier: ^3.0.1 version: 3.1.0 + services/apps/projects_evaluation_worker: + dependencies: + '@crowd/archetype-standard': + specifier: workspace:* + version: link:../../archetypes/standard + '@crowd/archetype-worker': + specifier: workspace:* + version: link:../../archetypes/worker + '@crowd/data-access-layer': + specifier: workspace:* + version: link:../../libs/data-access-layer + '@crowd/logging': + specifier: workspace:* + version: link:../../libs/logging + '@crowd/temporal': + specifier: workspace:* + version: link:../../libs/temporal + '@temporalio/client': + specifier: ~1.11.8 + version: 1.11.8 + '@temporalio/workflow': + specifier: ~1.11.8 + version: 1.11.8 + tsx: + specifier: ^4.7.1 + version: 4.7.3 + typescript: + specifier: ^5.6.3 + version: 5.6.3 + devDependencies: + '@types/node': + specifier: ^20.8.2 + version: 20.12.7 + nodemon: + specifier: ^3.0.1 + version: 3.1.0 + services/apps/script_executor_worker: dependencies: '@crowd/archetype-standard': diff --git a/scripts/builders/projects-evaluation-worker.env b/scripts/builders/projects-evaluation-worker.env new file mode 100644 index 0000000000..c00ccab3d6 --- /dev/null +++ b/scripts/builders/projects-evaluation-worker.env @@ -0,0 +1,4 @@ +DOCKERFILE="./services/docker/Dockerfile.projects_evaluation_worker" +CONTEXT="../" +REPO="sjc.ocir.io/axbydjxa5zuh/projects-evaluation-worker" +SERVICES="projects-evaluation-worker" diff --git a/scripts/services/docker/Dockerfile.projects_evaluation_worker b/scripts/services/docker/Dockerfile.projects_evaluation_worker new file mode 100644 index 0000000000..33f686b6ae --- /dev/null +++ b/scripts/services/docker/Dockerfile.projects_evaluation_worker @@ -0,0 +1,23 @@ +FROM node:20-alpine as builder + +RUN apk add --no-cache python3 make g++ + +WORKDIR /usr/crowd/app +RUN npm install -g corepack@latest && corepack enable pnpm && corepack prepare pnpm@9.15.0 --activate + +COPY ./pnpm-workspace.yaml ./pnpm-lock.yaml ./ +RUN pnpm fetch + +COPY ./services ./services +RUN pnpm i --frozen-lockfile + +FROM node:20-bookworm-slim as runner + +WORKDIR /usr/crowd/app +RUN npm install -g corepack@latest && corepack enable pnpm && corepack prepare pnpm@9.15.0 --activate && apt update && apt install -y ca-certificates --no-install-recommends && rm -rf /var/lib/apt/lists/* + +COPY --from=builder /usr/crowd/app/node_modules ./node_modules +COPY --from=builder /usr/crowd/app/services/base.tsconfig.json ./services/base.tsconfig.json +COPY --from=builder /usr/crowd/app/services/libs ./services/libs +COPY --from=builder /usr/crowd/app/services/archetypes/ ./services/archetypes +COPY --from=builder /usr/crowd/app/services/apps/projects_evaluation_worker/ ./services/apps/projects_evaluation_worker diff --git a/scripts/services/projects-evaluation-worker.yaml b/scripts/services/projects-evaluation-worker.yaml new file mode 100644 index 0000000000..f416694f32 --- /dev/null +++ b/scripts/services/projects-evaluation-worker.yaml @@ -0,0 +1,64 @@ +version: '3.1' + +x-env-args: &env-args + DOCKER_BUILDKIT: 1 + NODE_ENV: docker + SERVICE: projects-evaluation-worker + CROWD_TEMPORAL_TASKQUEUE: projects-evaluation + SHELL: /bin/sh + +services: + projects-evaluation-worker: + build: + context: ../../ + dockerfile: ./scripts/services/docker/Dockerfile.projects_evaluation_worker + command: 'pnpm run start' + working_dir: /usr/crowd/app/services/apps/projects_evaluation_worker + env_file: + - ../../backend/.env.dist.local + - ../../backend/.env.dist.composed + - ../../backend/.env.override.local + - ../../backend/.env.override.composed + environment: + <<: *env-args + restart: always + networks: + - crowd-bridge + + projects-evaluation-worker-dev: + build: + context: ../../ + dockerfile: ./scripts/services/docker/Dockerfile.projects_evaluation_worker + command: 'pnpm run dev' + working_dir: /usr/crowd/app/services/apps/projects_evaluation_worker + env_file: + - ../../backend/.env.dist.local + - ../../backend/.env.dist.composed + - ../../backend/.env.override.local + - ../../backend/.env.override.composed + environment: + <<: *env-args + hostname: projects-evaluation-worker + networks: + - crowd-bridge + volumes: + - ../../services/libs/audit-logs/src:/usr/crowd/app/services/libs/audit-logs/src + - ../../services/libs/common/src:/usr/crowd/app/services/libs/common/src + - ../../services/libs/common_services/src:/usr/crowd/app/services/libs/common_services/src + - ../../services/libs/data-access-layer/src:/usr/crowd/app/services/libs/data-access-layer/src + - ../../services/libs/database/src:/usr/crowd/app/services/libs/database/src + - ../../services/libs/integrations/src:/usr/crowd/app/services/libs/integrations/src + - ../../services/libs/logging/src:/usr/crowd/app/services/libs/logging/src + - ../../services/libs/nango/src:/usr/crowd/app/services/libs/nango/src + - ../../services/libs/opensearch/src:/usr/crowd/app/services/libs/opensearch/src + - ../../services/libs/queue/src:/usr/crowd/app/services/libs/queue/src + - ../../services/libs/redis/src:/usr/crowd/app/services/libs/redis/src + - ../../services/libs/snowflake/src:/usr/crowd/app/services/libs/snowflake/src + - ../../services/libs/telemetry/src:/usr/crowd/app/services/libs/telemetry/src + - ../../services/libs/temporal/src:/usr/crowd/app/services/libs/temporal/src + - ../../services/libs/types/src:/usr/crowd/app/services/libs/types/src + - ../../services/apps/projects_evaluation_worker/src:/usr/crowd/app/services/apps/projects_evaluation_worker/src + +networks: + crowd-bridge: + external: true diff --git a/services/apps/projects_evaluation_worker/package.json b/services/apps/projects_evaluation_worker/package.json new file mode 100644 index 0000000000..62483555f0 --- /dev/null +++ b/services/apps/projects_evaluation_worker/package.json @@ -0,0 +1,30 @@ +{ + "name": "@crowd/projects-evaluation-worker", + "private": true, + "scripts": { + "start": "CROWD_TEMPORAL_TASKQUEUE=projects-evaluation SERVICE=projects-evaluation-worker tsx src/main.ts", + "start:debug:local": "set -a && . ../../../backend/.env.dist.local && . ../../../backend/.env.override.local && set +a && CROWD_TEMPORAL_TASKQUEUE=projects-evaluation SERVICE=projects-evaluation-worker tsx --inspect=0.0.0.0:9233 src/main.ts", + "start:debug": "CROWD_TEMPORAL_TASKQUEUE=projects-evaluation SERVICE=projects-evaluation-worker LOG_LEVEL=trace tsx --inspect=0.0.0.0:9233 src/main.ts", + "dev:local": "nodemon --watch src --watch ../../libs --ext ts --exec pnpm run start:debug:local", + "dev": "nodemon --watch src --watch ../../libs --ext ts --exec pnpm run start:debug", + "lint": "npx eslint --ext .ts src --max-warnings=0", + "format": "npx prettier --write \"src/**/*.ts\"", + "format-check": "npx prettier --check .", + "tsc-check": "tsc --noEmit" + }, + "dependencies": { + "@crowd/archetype-standard": "workspace:*", + "@crowd/archetype-worker": "workspace:*", + "@crowd/data-access-layer": "workspace:*", + "@crowd/logging": "workspace:*", + "@crowd/temporal": "workspace:*", + "@temporalio/client": "~1.11.8", + "@temporalio/workflow": "~1.11.8", + "tsx": "^4.7.1", + "typescript": "^5.6.3" + }, + "devDependencies": { + "@types/node": "^20.8.2", + "nodemon": "^3.0.1" + } +} diff --git a/services/apps/projects_evaluation_worker/src/activities.ts b/services/apps/projects_evaluation_worker/src/activities.ts new file mode 100644 index 0000000000..3662234550 --- /dev/null +++ b/services/apps/projects_evaluation_worker/src/activities.ts @@ -0,0 +1 @@ +export * from './activities/activities' diff --git a/services/apps/projects_evaluation_worker/src/activities/activities.ts b/services/apps/projects_evaluation_worker/src/activities/activities.ts new file mode 100644 index 0000000000..2dcfc9ce9e --- /dev/null +++ b/services/apps/projects_evaluation_worker/src/activities/activities.ts @@ -0,0 +1,53 @@ +import { findProjectCatalogPendingEvaluation, updateProjectCatalog } from '@crowd/data-access-layer' +import { IDbProjectCatalog } from '@crowd/data-access-layer/src/project-catalog/types' +import { pgpQx } from '@crowd/data-access-layer/src/queryExecutor' +import { getServiceLogger } from '@crowd/logging' + +import { evaluateProject } from '../evaluator/evaluator' +import { svc } from '../main' + +const log = getServiceLogger() + +export async function fetchPendingProjects(batchSize: number): Promise { + const qx = pgpQx(svc.postgres.reader.connection()) + + const projects = await findProjectCatalogPendingEvaluation(qx, { limit: batchSize }) + + log.info({ count: projects.length, batchSize }, 'Fetched projects pending evaluation.') + + return projects +} + +export async function evaluateAndUpdateProject(project: IDbProjectCatalog): Promise { + const qx = pgpQx(svc.postgres.writer.connection()) + const startTime = Date.now() + + log.info({ id: project.id, repoUrl: project.repoUrl }, 'Starting evaluation.') + + const result = await evaluateProject({ + id: project.id, + repoUrl: project.repoUrl, + repoName: project.repoName, + projectSlug: project.projectSlug, + lfCriticalityScore: project.lfCriticalityScore, + source: project.source, + }) + + await updateProjectCatalog(qx, project.id, { + action: result.outcome, + evaluatedAt: new Date().toISOString(), + }) + + const elapsedSeconds = ((Date.now() - startTime) / 1000).toFixed(1) + + log.info( + { + id: project.id, + repoUrl: project.repoUrl, + outcome: result.outcome, + reason: result.reason, + elapsedSeconds, + }, + 'Evaluation complete.', + ) +} diff --git a/services/apps/projects_evaluation_worker/src/evaluator/evaluator.ts b/services/apps/projects_evaluation_worker/src/evaluator/evaluator.ts new file mode 100644 index 0000000000..d865a4be1f --- /dev/null +++ b/services/apps/projects_evaluation_worker/src/evaluator/evaluator.ts @@ -0,0 +1,9 @@ +import { IEvaluationInput, IEvaluationResult } from './types' + +// TODO: Replace with the actual AI evaluation algorithm once the external repo is integrated. +// The algorithm is described in the technical spec and currently takes ~30-40s per project +// at ~$0.15/project. Reference: https://github.com/... (link TBD). +export async function evaluateProject(input: IEvaluationInput): Promise { + console.error(`evaluateProject is not implemented yet for repo: ${input.repoUrl}`) + return { outcome: 'unsure', reason: 'evaluator not implemented' } +} diff --git a/services/apps/projects_evaluation_worker/src/evaluator/types.ts b/services/apps/projects_evaluation_worker/src/evaluator/types.ts new file mode 100644 index 0000000000..b74ff2c516 --- /dev/null +++ b/services/apps/projects_evaluation_worker/src/evaluator/types.ts @@ -0,0 +1,18 @@ +import { ProjectCatalogAction } from '@crowd/data-access-layer/src/project-catalog/types' + +export interface IEvaluationInput { + id: string + repoUrl: string + repoName: string + projectSlug: string + lfCriticalityScore: number | null + source: string | null +} + +// Evaluation can only resolve to 'onboard' or 'unsure' — never back to 'evaluate' or 'auto'. +export type EvaluationOutcome = Extract + +export interface IEvaluationResult { + outcome: EvaluationOutcome + reason: string +} diff --git a/services/apps/projects_evaluation_worker/src/main.ts b/services/apps/projects_evaluation_worker/src/main.ts new file mode 100644 index 0000000000..e76c155fab --- /dev/null +++ b/services/apps/projects_evaluation_worker/src/main.ts @@ -0,0 +1,40 @@ +import { Config } from '@crowd/archetype-standard' +import { Options, ServiceWorker } from '@crowd/archetype-worker' + +import { scheduleProjectsEvaluation } from './schedules/scheduleProjectsEvaluation' + +const config: Config = { + envvars: [], + producer: { + enabled: false, + }, + temporal: { + enabled: true, + }, + redis: { + enabled: false, + }, +} + +const options: Options = { + postgres: { + enabled: true, + }, + opensearch: { + enabled: false, + }, +} + +export const svc = new ServiceWorker(config, options) + +setImmediate(async () => { + await svc.init() + + svc.log.info('Projects evaluation worker starting up.') + + await scheduleProjectsEvaluation() + + svc.log.info('Projects evaluation worker running — schedule registered, waiting for Temporal.') + + await svc.start() +}) diff --git a/services/apps/projects_evaluation_worker/src/schedules/scheduleProjectsEvaluation.ts b/services/apps/projects_evaluation_worker/src/schedules/scheduleProjectsEvaluation.ts new file mode 100644 index 0000000000..a86b1abea3 --- /dev/null +++ b/services/apps/projects_evaluation_worker/src/schedules/scheduleProjectsEvaluation.ts @@ -0,0 +1,42 @@ +import { ScheduleAlreadyRunning, ScheduleOverlapPolicy } from '@temporalio/client' + +import { svc } from '../main' +import { evaluateProjects } from '../workflows' + +export const scheduleProjectsEvaluation = async () => { + svc.log.info('Scheduling projects evaluation') + + try { + await svc.temporal.schedule.create({ + scheduleId: 'projectsEvaluation', + spec: { + // Run every Monday at 02:00 UTC — gives time after the weekly discovery run. + cronExpressions: ['0 2 * * 1'], + }, + policies: { + overlap: ScheduleOverlapPolicy.SKIP, + catchupWindow: '1 minute', + }, + action: { + type: 'startWorkflow', + workflowType: evaluateProjects, + taskQueue: 'projects-evaluation', + args: [{ batchSize: 100 }], + // 100 projects × ~3min each = ~5h worst case; set ceiling with margin. + workflowExecutionTimeout: '6 hours', + retry: { + initialInterval: '30 seconds', + backoffCoefficient: 2, + maximumAttempts: 3, + }, + }, + }) + } catch (err) { + if (err instanceof ScheduleAlreadyRunning) { + svc.log.info('Schedule already registered in Temporal.') + svc.log.info('Configuration may have changed since. Please make sure they are in sync.') + } else { + throw new Error(err) + } + } +} diff --git a/services/apps/projects_evaluation_worker/src/workflows.ts b/services/apps/projects_evaluation_worker/src/workflows.ts new file mode 100644 index 0000000000..11a1257d16 --- /dev/null +++ b/services/apps/projects_evaluation_worker/src/workflows.ts @@ -0,0 +1,3 @@ +import { evaluateProjects } from './workflows/evaluateProjects' + +export { evaluateProjects } diff --git a/services/apps/projects_evaluation_worker/src/workflows/evaluateProjects.ts b/services/apps/projects_evaluation_worker/src/workflows/evaluateProjects.ts new file mode 100644 index 0000000000..eb3559c0eb --- /dev/null +++ b/services/apps/projects_evaluation_worker/src/workflows/evaluateProjects.ts @@ -0,0 +1,55 @@ +import { log, proxyActivities } from '@temporalio/workflow' + +import type * as activities from '../activities' + +// Short timeout: just a DB read. +const fetchActivities = proxyActivities({ + startToCloseTimeout: '2 minutes', + retry: { maximumAttempts: 3 }, +}) + +// Each AI evaluation call takes ~30-40s; give generous headroom per project. +const evaluateActivities = proxyActivities({ + startToCloseTimeout: '3 minutes', + retry: { maximumAttempts: 2 }, +}) + +const DEFAULT_BATCH_SIZE = 100 + +export async function evaluateProjects(input: { batchSize?: number } = {}): Promise { + const batchSize = input.batchSize ?? DEFAULT_BATCH_SIZE + + log.info('evaluateProjects workflow started.') + + const projects = await fetchActivities.fetchPendingProjects(batchSize) + + if (projects.length === 0) { + log.info('No projects pending evaluation. Nothing to do.') + return + } + + log.info(`Evaluating ${projects.length} project(s) (batch size: ${batchSize}).`) + + let succeeded = 0 + let failed = 0 + + for (let i = 0; i < projects.length; i++) { + const project = projects[i] + log.info(`[${i + 1}/${projects.length}] Evaluating: ${project.repoUrl}`) + + try { + await evaluateActivities.evaluateAndUpdateProject(project) + succeeded++ + } catch (err) { + // Log and continue — a single failure should not abort the whole batch. + failed++ + log.error( + `Evaluation failed for project id=${project.id} repoUrl=${project.repoUrl}: ${String(err)}`, + ) + } + } + + log.info( + `Batch evaluation complete. total=${projects.length} succeeded=${succeeded} failed=${failed}`, + ) +} diff --git a/services/apps/projects_evaluation_worker/tsconfig.json b/services/apps/projects_evaluation_worker/tsconfig.json new file mode 100644 index 0000000000..bf7f183850 --- /dev/null +++ b/services/apps/projects_evaluation_worker/tsconfig.json @@ -0,0 +1,4 @@ +{ + "extends": "../../base.tsconfig.json", + "include": ["src/**/*"] +} diff --git a/services/libs/data-access-layer/src/project-catalog/projectCatalog.ts b/services/libs/data-access-layer/src/project-catalog/projectCatalog.ts index 925d6ebf17..87dfcd19e1 100644 --- a/services/libs/data-access-layer/src/project-catalog/projectCatalog.ts +++ b/services/libs/data-access-layer/src/project-catalog/projectCatalog.ts @@ -79,6 +79,25 @@ export async function findAllProjectCatalog( ) } +export async function findProjectCatalogPendingEvaluation( + qx: QueryExecutor, + options: { limit?: number; offset?: number } = {}, +): Promise { + const { limit, offset } = options + + return qx.select( + ` + SELECT ${prepareSelectColumns(PROJECT_CATALOG_COLUMNS)} + FROM "projectCatalog" + WHERE action = 'evaluate' + ORDER BY "lfCriticalityScore" DESC NULLS LAST, "createdAt" ASC + ${limit !== undefined ? 'LIMIT $(limit)' : ''} + ${offset !== undefined ? 'OFFSET $(offset)' : ''} + `, + { limit, offset }, + ) +} + export async function countProjectCatalog(qx: QueryExecutor): Promise { const result = await qx.selectOne( `