Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions scripts/builders/projects-evaluation-worker.env
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
DOCKERFILE="./services/docker/Dockerfile.projects_evaluation_worker"
CONTEXT="../"
REPO="sjc.ocir.io/axbydjxa5zuh/projects-evaluation-worker"
SERVICES="projects-evaluation-worker"
23 changes: 23 additions & 0 deletions scripts/services/docker/Dockerfile.projects_evaluation_worker
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
FROM node:20-alpine as builder

RUN apk add --no-cache python3 make g++

WORKDIR /usr/crowd/app
RUN npm install -g corepack@latest && corepack enable pnpm && corepack prepare pnpm@9.15.0 --activate

COPY ./pnpm-workspace.yaml ./pnpm-lock.yaml ./
RUN pnpm fetch

COPY ./services ./services
RUN pnpm i --frozen-lockfile

FROM node:20-bookworm-slim as runner

WORKDIR /usr/crowd/app
RUN npm install -g corepack@latest && corepack enable pnpm && corepack prepare pnpm@9.15.0 --activate && apt update && apt install -y ca-certificates --no-install-recommends && rm -rf /var/lib/apt/lists/*

COPY --from=builder /usr/crowd/app/node_modules ./node_modules
COPY --from=builder /usr/crowd/app/services/base.tsconfig.json ./services/base.tsconfig.json
COPY --from=builder /usr/crowd/app/services/libs ./services/libs
COPY --from=builder /usr/crowd/app/services/archetypes/ ./services/archetypes
COPY --from=builder /usr/crowd/app/services/apps/projects_evaluation_worker/ ./services/apps/projects_evaluation_worker
64 changes: 64 additions & 0 deletions scripts/services/projects-evaluation-worker.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
version: '3.1'

x-env-args: &env-args
DOCKER_BUILDKIT: 1
NODE_ENV: docker
SERVICE: projects-evaluation-worker
CROWD_TEMPORAL_TASKQUEUE: projects-evaluation
SHELL: /bin/sh

services:
projects-evaluation-worker:
build:
context: ../../
dockerfile: ./scripts/services/docker/Dockerfile.projects_evaluation_worker
command: 'pnpm run start'
working_dir: /usr/crowd/app/services/apps/projects_evaluation_worker
env_file:
- ../../backend/.env.dist.local
- ../../backend/.env.dist.composed
- ../../backend/.env.override.local
- ../../backend/.env.override.composed
environment:
<<: *env-args
restart: always
networks:
- crowd-bridge

projects-evaluation-worker-dev:
build:
context: ../../
dockerfile: ./scripts/services/docker/Dockerfile.projects_evaluation_worker
command: 'pnpm run dev'
working_dir: /usr/crowd/app/services/apps/projects_evaluation_worker
env_file:
- ../../backend/.env.dist.local
- ../../backend/.env.dist.composed
- ../../backend/.env.override.local
- ../../backend/.env.override.composed
environment:
<<: *env-args
hostname: projects-evaluation-worker
networks:
- crowd-bridge
volumes:
- ../../services/libs/audit-logs/src:/usr/crowd/app/services/libs/audit-logs/src
- ../../services/libs/common/src:/usr/crowd/app/services/libs/common/src
- ../../services/libs/common_services/src:/usr/crowd/app/services/libs/common_services/src
- ../../services/libs/data-access-layer/src:/usr/crowd/app/services/libs/data-access-layer/src
- ../../services/libs/database/src:/usr/crowd/app/services/libs/database/src
- ../../services/libs/integrations/src:/usr/crowd/app/services/libs/integrations/src
- ../../services/libs/logging/src:/usr/crowd/app/services/libs/logging/src
- ../../services/libs/nango/src:/usr/crowd/app/services/libs/nango/src
- ../../services/libs/opensearch/src:/usr/crowd/app/services/libs/opensearch/src
- ../../services/libs/queue/src:/usr/crowd/app/services/libs/queue/src
- ../../services/libs/redis/src:/usr/crowd/app/services/libs/redis/src
- ../../services/libs/snowflake/src:/usr/crowd/app/services/libs/snowflake/src
- ../../services/libs/telemetry/src:/usr/crowd/app/services/libs/telemetry/src
- ../../services/libs/temporal/src:/usr/crowd/app/services/libs/temporal/src
- ../../services/libs/types/src:/usr/crowd/app/services/libs/types/src
- ../../services/apps/projects_evaluation_worker/src:/usr/crowd/app/services/apps/projects_evaluation_worker/src

networks:
crowd-bridge:
external: true
30 changes: 30 additions & 0 deletions services/apps/projects_evaluation_worker/package.json
Comment thread
github-license-compliance[bot] marked this conversation as resolved.
Fixed
Comment thread
github-license-compliance[bot] marked this conversation as resolved.
Fixed
Comment thread
github-license-compliance[bot] marked this conversation as resolved.
Fixed
Comment thread
github-license-compliance[bot] marked this conversation as resolved.
Fixed
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
{
"name": "@crowd/projects-evaluation-worker",
"private": true,
Comment thread
ulemons marked this conversation as resolved.
"scripts": {
"start": "CROWD_TEMPORAL_TASKQUEUE=projects-evaluation SERVICE=projects-evaluation-worker tsx src/main.ts",
"start:debug:local": "set -a && . ../../../backend/.env.dist.local && . ../../../backend/.env.override.local && set +a && CROWD_TEMPORAL_TASKQUEUE=projects-evaluation SERVICE=projects-evaluation-worker tsx --inspect=0.0.0.0:9233 src/main.ts",
"start:debug": "CROWD_TEMPORAL_TASKQUEUE=projects-evaluation SERVICE=projects-evaluation-worker LOG_LEVEL=trace tsx --inspect=0.0.0.0:9233 src/main.ts",
Comment thread
ulemons marked this conversation as resolved.
"dev:local": "nodemon --watch src --watch ../../libs --ext ts --exec pnpm run start:debug:local",
"dev": "nodemon --watch src --watch ../../libs --ext ts --exec pnpm run start:debug",
"lint": "npx eslint --ext .ts src --max-warnings=0",
"format": "npx prettier --write \"src/**/*.ts\"",
"format-check": "npx prettier --check .",
"tsc-check": "tsc --noEmit"
},
"dependencies": {
"@crowd/archetype-standard": "workspace:*",
"@crowd/archetype-worker": "workspace:*",
"@crowd/data-access-layer": "workspace:*",
"@crowd/logging": "workspace:*",
"@crowd/temporal": "workspace:*",
"@temporalio/client": "~1.11.8",
"@temporalio/workflow": "~1.11.8",
"tsx": "^4.7.1",
"typescript": "^5.6.3"
},
"devDependencies": {
"@types/node": "^20.8.2",
"nodemon": "^3.0.1"
}
}
1 change: 1 addition & 0 deletions services/apps/projects_evaluation_worker/src/activities.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
export * from './activities/activities'
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import { findProjectCatalogPendingEvaluation, updateProjectCatalog } from '@crowd/data-access-layer'
import { IDbProjectCatalog } from '@crowd/data-access-layer/src/project-catalog/types'
import { pgpQx } from '@crowd/data-access-layer/src/queryExecutor'
import { getServiceLogger } from '@crowd/logging'

import { evaluateProject } from '../evaluator/evaluator'
import { svc } from '../main'

const log = getServiceLogger()

export async function fetchPendingProjects(batchSize: number): Promise<IDbProjectCatalog[]> {
const qx = pgpQx(svc.postgres.reader.connection())

const projects = await findProjectCatalogPendingEvaluation(qx, { limit: batchSize })

log.info({ count: projects.length, batchSize }, 'Fetched projects pending evaluation.')

return projects
}

export async function evaluateAndUpdateProject(project: IDbProjectCatalog): Promise<void> {
const qx = pgpQx(svc.postgres.writer.connection())
const startTime = Date.now()

log.info({ id: project.id, repoUrl: project.repoUrl }, 'Starting evaluation.')

const result = await evaluateProject({
id: project.id,
repoUrl: project.repoUrl,
repoName: project.repoName,
projectSlug: project.projectSlug,
lfCriticalityScore: project.lfCriticalityScore,
source: project.source,
})

await updateProjectCatalog(qx, project.id, {
action: result.outcome,
evaluatedAt: new Date().toISOString(),
})

const elapsedSeconds = ((Date.now() - startTime) / 1000).toFixed(1)

log.info(
{
id: project.id,
repoUrl: project.repoUrl,
outcome: result.outcome,
reason: result.reason,
elapsedSeconds,
},
'Evaluation complete.',
)
Comment thread
ulemons marked this conversation as resolved.
}
Comment thread
ulemons marked this conversation as resolved.
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import { IEvaluationInput, IEvaluationResult } from './types'

// TODO: Replace with the actual AI evaluation algorithm once the external repo is integrated.
// The algorithm is described in the technical spec and currently takes ~30-40s per project
// at ~$0.15/project. Reference: https://github.com/... (link TBD).
Comment thread
ulemons marked this conversation as resolved.
Comment thread
ulemons marked this conversation as resolved.
export async function evaluateProject(input: IEvaluationInput): Promise<IEvaluationResult> {
console.error(`evaluateProject is not implemented yet for repo: ${input.repoUrl}`)
Comment thread
ulemons marked this conversation as resolved.
return { outcome: 'unsure', reason: 'evaluator not implemented' }
Comment thread
ulemons marked this conversation as resolved.
}
Comment thread
ulemons marked this conversation as resolved.
18 changes: 18 additions & 0 deletions services/apps/projects_evaluation_worker/src/evaluator/types.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import { ProjectCatalogAction } from '@crowd/data-access-layer/src/project-catalog/types'

export interface IEvaluationInput {
id: string
repoUrl: string
repoName: string
projectSlug: string
lfCriticalityScore: number | null
source: string | null
}

// Evaluation can only resolve to 'onboard' or 'unsure' — never back to 'evaluate' or 'auto'.
export type EvaluationOutcome = Extract<ProjectCatalogAction, 'onboard' | 'unsure'>

export interface IEvaluationResult {
outcome: EvaluationOutcome
reason: string
}
40 changes: 40 additions & 0 deletions services/apps/projects_evaluation_worker/src/main.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import { Config } from '@crowd/archetype-standard'
import { Options, ServiceWorker } from '@crowd/archetype-worker'

import { scheduleProjectsEvaluation } from './schedules/scheduleProjectsEvaluation'

const config: Config = {
envvars: [],
producer: {
enabled: false,
},
temporal: {
enabled: true,
},
redis: {
enabled: false,
Comment thread
ulemons marked this conversation as resolved.
},
}

const options: Options = {
postgres: {
enabled: true,
},
opensearch: {
enabled: false,
},
}

export const svc = new ServiceWorker(config, options)

setImmediate(async () => {
await svc.init()

svc.log.info('Projects evaluation worker starting up.')

await scheduleProjectsEvaluation()

svc.log.info('Projects evaluation worker running — schedule registered, waiting for Temporal.')

await svc.start()
})
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import { ScheduleAlreadyRunning, ScheduleOverlapPolicy } from '@temporalio/client'

import { svc } from '../main'
import { evaluateProjects } from '../workflows'

export const scheduleProjectsEvaluation = async () => {
svc.log.info('Scheduling projects evaluation')

try {
await svc.temporal.schedule.create({
scheduleId: 'projectsEvaluation',
spec: {
// Run every Monday at 02:00 UTC — gives time after the weekly discovery run.
cronExpressions: ['0 2 * * 1'],
},
policies: {
overlap: ScheduleOverlapPolicy.SKIP,
catchupWindow: '1 minute',
},
action: {
type: 'startWorkflow',
workflowType: evaluateProjects,
taskQueue: 'projects-evaluation',
args: [{ batchSize: 100 }],
Comment thread
ulemons marked this conversation as resolved.
// 100 projects × ~3min each = ~5h worst case; set ceiling with margin.
workflowExecutionTimeout: '6 hours',
Comment thread
ulemons marked this conversation as resolved.
Comment thread
ulemons marked this conversation as resolved.
retry: {
initialInterval: '30 seconds',
backoffCoefficient: 2,
maximumAttempts: 3,
},
},
})
} catch (err) {
if (err instanceof ScheduleAlreadyRunning) {
svc.log.info('Schedule already registered in Temporal.')
svc.log.info('Configuration may have changed since. Please make sure they are in sync.')
} else {
throw new Error(err)
Comment thread
ulemons marked this conversation as resolved.
Comment thread
ulemons marked this conversation as resolved.
Comment thread
ulemons marked this conversation as resolved.
Comment thread
ulemons marked this conversation as resolved.
}
}
}
3 changes: 3 additions & 0 deletions services/apps/projects_evaluation_worker/src/workflows.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import { evaluateProjects } from './workflows/evaluateProjects'

export { evaluateProjects }
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import { log, proxyActivities } from '@temporalio/workflow'

import type * as activities from '../activities'

// Short timeout: just a DB read.
const fetchActivities = proxyActivities<typeof activities>({
startToCloseTimeout: '2 minutes',
retry: { maximumAttempts: 3 },
})

// Each AI evaluation call takes ~30-40s; give generous headroom per project.
const evaluateActivities = proxyActivities<typeof activities>({
startToCloseTimeout: '3 minutes',
Comment thread
ulemons marked this conversation as resolved.
retry: { maximumAttempts: 2 },
Comment thread
ulemons marked this conversation as resolved.
})

const DEFAULT_BATCH_SIZE = 100

export async function evaluateProjects(input: { batchSize?: number } = {}): Promise<void> {
const batchSize = input.batchSize ?? DEFAULT_BATCH_SIZE

log.info('evaluateProjects workflow started.')

const projects = await fetchActivities.fetchPendingProjects(batchSize)

if (projects.length === 0) {
log.info('No projects pending evaluation. Nothing to do.')
return
}

log.info(`Evaluating ${projects.length} project(s) (batch size: ${batchSize}).`)

let succeeded = 0
let failed = 0

for (let i = 0; i < projects.length; i++) {
const project = projects[i]
log.info(`[${i + 1}/${projects.length}] Evaluating: ${project.repoUrl}`)

try {
await evaluateActivities.evaluateAndUpdateProject(project)
succeeded++
} catch (err) {
// Log and continue — a single failure should not abort the whole batch.
failed++
log.error(
`Evaluation failed for project id=${project.id} repoUrl=${project.repoUrl}: ${String(err)}`,
Comment thread
ulemons marked this conversation as resolved.
)
}
}

log.info(
`Batch evaluation complete. total=${projects.length} succeeded=${succeeded} failed=${failed}`,
)
}
Loading
Loading