/** * Sync Service - Orchestrates synchronization of cloud provider data * * Features: * - Multi-provider synchronization (Linode, Vultr, AWS) * - Stage-based sync process with error recovery * - Provider status tracking and reporting * - Batch operations for efficiency * * @example * const orchestrator = new SyncOrchestrator(db, env); * const report = await orchestrator.syncAll(['linode']); */ import { LinodeConnector } from '../connectors/linode'; import { VultrConnector } from '../connectors/vultr'; import { AWSConnector } from '../connectors/aws'; import { RepositoryFactory } from '../repositories'; import { createLogger } from '../utils/logger'; import { calculateRetailHourly, calculateRetailMonthly, SUPPORTED_PROVIDERS } from '../constants'; import type { Env, ProviderSyncResult, SyncReport, RegionInput, InstanceTypeInput, PricingInput, GpuInstanceInput, GpuPricingInput, G8InstanceInput, G8PricingInput, VpuInstanceInput, VpuPricingInput, } from '../types'; import { SyncStage } from '../types'; /** * Wraps a promise with a timeout * @param promise - The promise to wrap * @param ms - Timeout in milliseconds * @param operation - Operation name for error message * @returns Promise result if completed within timeout * @throws Error if operation times out */ async function withTimeout(promise: Promise, ms: number, operation: string): Promise { let timeoutId: ReturnType; const timeoutPromise = new Promise((_, reject) => { timeoutId = setTimeout(() => reject(new Error(`${operation} timed out after ${ms}ms`)), ms); }); try { return await Promise.race([promise, timeoutPromise]); } finally { clearTimeout(timeoutId!); } } /** * Cloud provider connector interface for SyncOrchestrator * * This is an adapter interface used by SyncOrchestrator to abstract * provider-specific implementations. Actual provider connectors (LinodeConnector, * VultrConnector, etc.) extend CloudConnector from base.ts and are wrapped * by this interface in createConnector(). */ export interface SyncConnectorAdapter { /** Authenticate and validate credentials */ authenticate(): Promise; /** Fetch all available regions (normalized) */ getRegions(): Promise; /** Fetch all instance types (normalized) */ getInstanceTypes(): Promise; /** Fetch GPU instances (optional, only for providers with GPU support) */ getGpuInstances?(): Promise; /** Fetch G8 instances (optional, only for Linode) */ getG8Instances?(): Promise; /** Fetch VPU instances (optional, only for Linode) */ getVpuInstances?(): Promise; /** * Fetch pricing data for instances and regions * @param instanceTypeIds - Array of database instance type IDs * @param regionIds - Array of database region IDs * @param dbInstanceMap - Map of DB instance type ID to instance_id (API ID) for avoiding redundant queries * @param dbGpuMap - Map of GPU instance IDs (optional) * @param dbG8Map - Map of G8 instance IDs (optional) * @param dbVpuMap - Map of VPU instance IDs (optional) * @returns Array of pricing records OR number of records if batched internally */ getPricing( instanceTypeIds: number[], regionIds: number[], dbInstanceMap: Map, dbGpuMap?: Map, dbG8Map?: Map, dbVpuMap?: Map ): Promise; } /** * Sync orchestrator for managing provider synchronization */ export class SyncOrchestrator { private repos: RepositoryFactory; private logger: ReturnType; constructor( db: D1Database, private env: Env ) { this.repos = new RepositoryFactory(db, env); this.logger = createLogger('[SyncOrchestrator]', env); this.logger.info('Initialized'); } /** * Synchronize a single provider * * @param provider - Provider name (linode, vultr, aws) * @returns Sync result with statistics and error information */ async syncProvider(provider: string): Promise { const startTime = Date.now(); let stage = SyncStage.INIT; this.logger.info('Starting sync for provider', { provider }); try { // Stage 1: Initialize - Fetch provider record ONCE stage = SyncStage.INIT; const providerRecord = await this.repos.providers.findByName(provider); if (!providerRecord) { throw new Error(`Provider not found in database: ${provider}`); } // Update provider status to syncing await this.repos.providers.updateSyncStatus(provider, 'syncing'); this.logger.info(`${provider} → ${stage}`); // Stage 2: Initialize connector and authenticate const connector = await this.createConnector(provider, providerRecord.id); await withTimeout(connector.authenticate(), 10000, `${provider} authentication`); this.logger.info(`${provider} → initialized`); // Stage 3: Fetch regions from provider API stage = SyncStage.FETCH_REGIONS; const regions = await withTimeout(connector.getRegions(), 15000, `${provider} fetch regions`); this.logger.info(`${provider} → ${stage}`, { regions: regions.length }); // Stage 4: Fetch instance types from provider API stage = SyncStage.FETCH_INSTANCES; const instances = await withTimeout(connector.getInstanceTypes(), 30000, `${provider} fetch instances`); this.logger.info(`${provider} → ${stage}`, { instances: instances.length }); // Stage 5: Normalize data (add provider_id) stage = SyncStage.NORMALIZE; const normalizedRegions = regions.map(r => ({ ...r, provider_id: providerRecord.id, })); const normalizedInstances = instances.map(i => ({ ...i, provider_id: providerRecord.id, })); this.logger.info(`${provider} → ${stage}`); // Stage 6: Persist to database stage = SyncStage.PERSIST; const regionsCount = await this.repos.regions.upsertMany( providerRecord.id, normalizedRegions ); // Persist regular instances (already filtered in getInstanceTypes) const regularInstancesCount = await this.repos.instances.upsertMany( providerRecord.id, normalizedInstances ); // Handle specialized instances separately for Linode and Vultr let gpuInstancesCount = 0; let g8InstancesCount = 0; let vpuInstancesCount = 0; if (provider.toLowerCase() === 'linode') { // GPU instances if (connector.getGpuInstances) { const gpuInstances = await withTimeout(connector.getGpuInstances(), 15000, `${provider} fetch GPU instances`); if (gpuInstances && gpuInstances.length > 0) { gpuInstancesCount = await this.repos.gpuInstances.upsertMany( providerRecord.id, gpuInstances ); } } // G8 instances if (connector.getG8Instances) { const g8Instances = await withTimeout(connector.getG8Instances(), 15000, `${provider} fetch G8 instances`); if (g8Instances && g8Instances.length > 0) { g8InstancesCount = await this.repos.g8Instances.upsertMany( providerRecord.id, g8Instances ); } } // VPU instances if (connector.getVpuInstances) { const vpuInstances = await withTimeout(connector.getVpuInstances(), 15000, `${provider} fetch VPU instances`); if (vpuInstances && vpuInstances.length > 0) { vpuInstancesCount = await this.repos.vpuInstances.upsertMany( providerRecord.id, vpuInstances ); } } } // Handle Vultr GPU instances if (provider.toLowerCase() === 'vultr') { if (connector.getGpuInstances) { const gpuInstances = await withTimeout(connector.getGpuInstances(), 15000, `${provider} fetch GPU instances`); if (gpuInstances && gpuInstances.length > 0) { gpuInstancesCount = await this.repos.gpuInstances.upsertMany( providerRecord.id, gpuInstances ); } } } const instancesCount = regularInstancesCount + gpuInstancesCount + g8InstancesCount + vpuInstancesCount; // Fetch pricing data - need instance and region IDs from DB // Use D1 batch to reduce query count (fetch all instance types in one batch) const batchQueries = [ this.repos.db.prepare('SELECT id, region_code FROM regions WHERE provider_id = ?').bind(providerRecord.id), this.repos.db.prepare('SELECT id, instance_id FROM instance_types WHERE provider_id = ?').bind(providerRecord.id), this.repos.db.prepare('SELECT id, instance_id FROM gpu_instances WHERE provider_id = ?').bind(providerRecord.id), this.repos.db.prepare('SELECT id, instance_id FROM g8_instances WHERE provider_id = ?').bind(providerRecord.id), this.repos.db.prepare('SELECT id, instance_id FROM vpu_instances WHERE provider_id = ?').bind(providerRecord.id) ]; const [dbRegionsResult, dbInstancesResult, dbGpuResult, dbG8Result, dbVpuResult] = await this.repos.db.batch(batchQueries); if (!dbRegionsResult.success || !dbInstancesResult.success) { throw new Error('Failed to fetch regions/instances for pricing'); } // Validate and extract region IDs if (!Array.isArray(dbRegionsResult.results)) { throw new Error('Unexpected database result format for regions'); } const regionIds = dbRegionsResult.results.map((r: any) => { if (typeof r?.id !== 'number') { throw new Error('Invalid region id in database result'); } return r.id; }); // Validate and extract instance type data if (!Array.isArray(dbInstancesResult.results)) { throw new Error('Unexpected database result format for instances'); } const dbInstancesData = dbInstancesResult.results.map((i: any) => { if (typeof i?.id !== 'number' || typeof i?.instance_id !== 'string') { throw new Error('Invalid instance data in database result'); } return { id: i.id, instance_id: i.instance_id }; }); const instanceTypeIds = dbInstancesData.map(i => i.id); // Create instance mapping to avoid redundant queries in getPricing const dbInstanceMap = new Map( dbInstancesData.map(i => [i.id, { instance_id: i.instance_id }]) ); // Create specialized instance mappings with validation if (!Array.isArray(dbGpuResult.results)) { throw new Error('Unexpected database result format for GPU instances'); } const dbGpuMap = new Map( dbGpuResult.results.map((i: any) => { if (typeof i?.id !== 'number' || typeof i?.instance_id !== 'string') { throw new Error('Invalid GPU instance data in database result'); } return [i.id, { instance_id: i.instance_id }]; }) ); if (!Array.isArray(dbG8Result.results)) { throw new Error('Unexpected database result format for G8 instances'); } const dbG8Map = new Map( dbG8Result.results.map((i: any) => { if (typeof i?.id !== 'number' || typeof i?.instance_id !== 'string') { throw new Error('Invalid G8 instance data in database result'); } return [i.id, { instance_id: i.instance_id }]; }) ); if (!Array.isArray(dbVpuResult.results)) { throw new Error('Unexpected database result format for VPU instances'); } const dbVpuMap = new Map( dbVpuResult.results.map((i: any) => { if (typeof i?.id !== 'number' || typeof i?.instance_id !== 'string') { throw new Error('Invalid VPU instance data in database result'); } return [i.id, { instance_id: i.instance_id }]; }) ); // Get pricing data - may return array or count depending on provider // Pass all instance maps for specialized pricing const pricingResult = await withTimeout( connector.getPricing( instanceTypeIds, regionIds, dbInstanceMap, dbGpuMap, dbG8Map, dbVpuMap ), 60000, `${provider} fetch pricing` ); // Handle both return types: array (Linode, Vultr) or number (AWS with generator) let pricingCount = 0; if (typeof pricingResult === 'number') { // Provider processed batches internally, returned count pricingCount = pricingResult; } else if (pricingResult.length > 0) { // Provider returned pricing array, upsert it pricingCount = await this.repos.pricing.upsertMany(pricingResult); } this.logger.info(`${provider} → ${stage}`, { regions: regionsCount, regular_instances: regularInstancesCount, gpu_instances: gpuInstancesCount, g8_instances: g8InstancesCount, vpu_instances: vpuInstancesCount, total_instances: instancesCount, pricing: pricingCount }); // Stage 7: Validate stage = SyncStage.VALIDATE; if (regionsCount === 0 || instancesCount === 0) { throw new Error('No data was synced - possible API or parsing issue'); } this.logger.info(`${provider} → ${stage}`); // Stage 8: Sync Anvil Pricing (if applicable) stage = SyncStage.SYNC_ANVIL_PRICING; let anvilPricingCount = 0; try { anvilPricingCount = await this.syncAnvilPricing(provider); if (anvilPricingCount > 0) { this.logger.info(`${provider} → ${stage}`, { anvil_pricing: anvilPricingCount }); } } catch (anvilError) { // Log error but don't fail the entire sync this.logger.error('Anvil pricing sync failed', { provider, error: anvilError instanceof Error ? anvilError.message : String(anvilError) }); } // Stage 9: Complete - Update provider status to success stage = SyncStage.COMPLETE; await this.repos.providers.updateSyncStatus(provider, 'success'); const duration = Date.now() - startTime; this.logger.info(`${provider} → ${stage}`, { duration_ms: duration }); return { provider, success: true, regions_synced: regionsCount, instances_synced: instancesCount, pricing_synced: pricingCount, duration_ms: duration, }; } catch (error) { const duration = Date.now() - startTime; const errorMessage = error instanceof Error ? error.message : 'Unknown error'; this.logger.error(`${provider} failed at ${stage}`, { error: error instanceof Error ? error.message : String(error), stage }); // Update provider status to error try { await this.repos.providers.updateSyncStatus(provider, 'error', errorMessage); } catch (statusError) { this.logger.error('Failed to update provider status', { error: statusError instanceof Error ? statusError.message : String(statusError) }); } return { provider, success: false, regions_synced: 0, instances_synced: 0, pricing_synced: 0, duration_ms: duration, error: errorMessage, error_details: { stage, message: errorMessage, // Stack trace logged server-side only, not exposed to clients }, }; } } /** * Synchronize all providers * * IMPORTANT: Providers are synced sequentially (not in parallel) to avoid * exceeding Cloudflare Workers' 30-second CPU time limit. Each provider * sync involves multiple API calls and database operations. * * For production deployments with large datasets, consider using * Cloudflare Queues to process each provider as a separate job. * * @param providers - Array of provider names to sync (defaults to all supported providers) * @returns Complete sync report with statistics */ async syncAll(providers: string[] = [...SUPPORTED_PROVIDERS]): Promise { const startedAt = new Date().toISOString(); const startTime = Date.now(); this.logger.info('Starting sequential sync for providers', { providers: providers.join(', ') }); // Run provider syncs sequentially to avoid CPU timeout // Each provider sync is independent and can complete within time limits const providerResults: ProviderSyncResult[] = []; for (const provider of providers) { try { const result = await this.syncProvider(provider); providerResults.push(result); // Log progress after each provider this.logger.info('Provider sync completed', { provider, success: result.success, elapsed_ms: Date.now() - startTime }); } catch (error) { // Handle unexpected errors providerResults.push({ provider, success: false, regions_synced: 0, instances_synced: 0, pricing_synced: 0, duration_ms: 0, error: error instanceof Error ? error.message : 'Unknown error', }); } } const completedAt = new Date().toISOString(); const totalDuration = Date.now() - startTime; // Calculate summary const successful = providerResults.filter(r => r.success); const failed = providerResults.filter(r => !r.success); const summary = { total_providers: providers.length, successful_providers: successful.length, failed_providers: failed.length, total_regions: providerResults.reduce((sum, r) => sum + r.regions_synced, 0), total_instances: providerResults.reduce((sum, r) => sum + r.instances_synced, 0), total_pricing: providerResults.reduce((sum, r) => sum + r.pricing_synced, 0), }; const report: SyncReport = { success: failed.length === 0, started_at: startedAt, completed_at: completedAt, total_duration_ms: totalDuration, providers: providerResults, summary, }; this.logger.info('Sync complete', { total: summary.total_providers, success: summary.successful_providers, failed: summary.failed_providers, duration_ms: totalDuration, }); return report; } /** * Generate AWS pricing records in batches using Generator pattern * Minimizes memory usage by yielding batches of 100 records at a time * * @param instanceTypeIds - Array of database instance type IDs * @param regionIds - Array of database region IDs * @param dbInstanceMap - Map of instance type ID to DB instance data * @param rawInstanceMap - Map of instance_id (API ID) to raw AWS data * @yields Batches of PricingInput records (100 per batch) * * Manual Test: * Generator yields ~252 batches for ~25,230 total records (870 instances × 29 regions) */ private *generateAWSPricingBatches( instanceTypeIds: number[], regionIds: number[], dbInstanceMap: Map, rawInstanceMap: Map ): Generator { const BATCH_SIZE = 500; let batch: PricingInput[] = []; for (const regionId of regionIds) { for (const instanceTypeId of instanceTypeIds) { const dbInstance = dbInstanceMap.get(instanceTypeId); if (!dbInstance) { this.logger.warn('Instance type not found', { instanceTypeId }); continue; } const rawInstance = rawInstanceMap.get(dbInstance.instance_id); if (!rawInstance) { this.logger.warn('Raw instance data not found', { instance_id: dbInstance.instance_id }); continue; } batch.push({ instance_type_id: instanceTypeId, region_id: regionId, hourly_price: rawInstance.Cost, monthly_price: rawInstance.MonthlyPrice, currency: 'USD', available: 1, }); if (batch.length >= BATCH_SIZE) { yield batch; batch = []; } } } // Yield remaining records if (batch.length > 0) { yield batch; } } /** * Generate Linode pricing records in batches using Generator pattern * Minimizes memory usage by yielding batches at a time (default: 100) * * @param instanceTypeIds - Array of database instance type IDs * @param regionIds - Array of database region IDs * @param dbInstanceMap - Map of instance type ID to DB instance data * @param rawInstanceMap - Map of instance_id (API ID) to raw Linode data * @param env - Environment configuration for SYNC_BATCH_SIZE * @yields Batches of PricingInput records (configurable batch size) * * Manual Test: * For typical Linode deployment (~200 instance types × 20 regions = 4,000 records): * - Default batch size (100): ~40 batches * - Memory savings: ~95% (4,000 records → 100 records in memory) * - Verify: Check logs for "Generated and upserted pricing records for Linode" */ private *generateLinodePricingBatches( instanceTypeIds: number[], regionIds: number[], dbInstanceMap: Map, rawInstanceMap: Map, env?: Env ): Generator { const BATCH_SIZE = Math.min( Math.max(parseInt(env?.SYNC_BATCH_SIZE || '500', 10) || 500, 1), 1000 ); let batch: PricingInput[] = []; for (const regionId of regionIds) { for (const instanceTypeId of instanceTypeIds) { const dbInstance = dbInstanceMap.get(instanceTypeId); if (!dbInstance) { this.logger.warn('Instance type not found', { instanceTypeId }); continue; } const rawInstance = rawInstanceMap.get(dbInstance.instance_id); if (!rawInstance) { this.logger.warn('Raw instance data not found', { instance_id: dbInstance.instance_id }); continue; } batch.push({ instance_type_id: instanceTypeId, region_id: regionId, hourly_price: rawInstance.price.hourly, monthly_price: rawInstance.price.monthly, currency: 'USD', available: 1, }); if (batch.length >= BATCH_SIZE) { yield batch; batch = []; } } } // Yield remaining records if (batch.length > 0) { yield batch; } } /** * Generate Vultr pricing records in batches using Generator pattern * Minimizes memory usage by yielding batches at a time (default: 100) * * @param instanceTypeIds - Array of database instance type IDs * @param regionIds - Array of database region IDs * @param dbInstanceMap - Map of instance type ID to DB instance data * @param rawPlanMap - Map of plan_id (API ID) to raw Vultr plan data * @param env - Environment configuration for SYNC_BATCH_SIZE * @yields Batches of PricingInput records (configurable batch size) * * Manual Test: * For typical Vultr deployment (~100 plans × 20 regions = 2,000 records): * - Default batch size (100): ~20 batches * - Memory savings: ~95% (2,000 records → 100 records in memory) * - Verify: Check logs for "Generated and upserted pricing records for Vultr" */ private *generateVultrPricingBatches( instanceTypeIds: number[], regionIds: number[], dbInstanceMap: Map, rawPlanMap: Map, env?: Env ): Generator { const BATCH_SIZE = Math.min( Math.max(parseInt(env?.SYNC_BATCH_SIZE || '500', 10) || 500, 1), 1000 ); let batch: PricingInput[] = []; for (const regionId of regionIds) { for (const instanceTypeId of instanceTypeIds) { const dbInstance = dbInstanceMap.get(instanceTypeId); if (!dbInstance) { this.logger.warn('Instance type not found', { instanceTypeId }); continue; } const rawPlan = rawPlanMap.get(dbInstance.instance_id); if (!rawPlan) { this.logger.warn('Raw plan data not found', { instance_id: dbInstance.instance_id }); continue; } // Calculate hourly price: monthly_cost / 730 hours const hourlyPrice = rawPlan.monthly_cost / 730; batch.push({ instance_type_id: instanceTypeId, region_id: regionId, hourly_price: hourlyPrice, monthly_price: rawPlan.monthly_cost, currency: 'USD', available: 1, }); if (batch.length >= BATCH_SIZE) { yield batch; batch = []; } } } // Yield remaining records if (batch.length > 0) { yield batch; } } /** * Generate Linode GPU pricing records in batches using Generator pattern * Minimizes memory usage by yielding batches at a time (default: 100) * * @param gpuInstanceTypeIds - Array of database GPU instance type IDs * @param regionIds - Array of database region IDs * @param dbGpuInstanceMap - Map of GPU instance type ID to DB instance data * @param rawInstanceMap - Map of instance_id (API ID) to raw Linode data * @param env - Environment configuration for SYNC_BATCH_SIZE * @yields Batches of GpuPricingInput records (configurable batch size) * * Manual Test: * For typical Linode GPU instances (~10 GPU types × 20 regions = 200 records): * - Default batch size (100): ~2 batches * - Memory savings: ~50% (200 records → 100 records in memory) * - Verify: Check logs for "Generated and upserted GPU pricing records for Linode" */ private *generateLinodeGpuPricingBatches( gpuInstanceTypeIds: number[], regionIds: number[], dbGpuInstanceMap: Map, rawInstanceMap: Map, env?: Env ): Generator { const BATCH_SIZE = Math.min( Math.max(parseInt(env?.SYNC_BATCH_SIZE || '500', 10) || 500, 1), 1000 ); let batch: GpuPricingInput[] = []; for (const regionId of regionIds) { for (const gpuInstanceId of gpuInstanceTypeIds) { const dbInstance = dbGpuInstanceMap.get(gpuInstanceId); if (!dbInstance) { this.logger.warn('GPU instance type not found', { gpuInstanceId }); continue; } const rawInstance = rawInstanceMap.get(dbInstance.instance_id); if (!rawInstance) { this.logger.warn('Raw GPU instance data not found', { instance_id: dbInstance.instance_id }); continue; } batch.push({ gpu_instance_id: gpuInstanceId, region_id: regionId, hourly_price: rawInstance.price.hourly, monthly_price: rawInstance.price.monthly, currency: 'USD', available: 1, }); if (batch.length >= BATCH_SIZE) { yield batch; batch = []; } } } // Yield remaining records if (batch.length > 0) { yield batch; } } /** * Generate Vultr GPU pricing records in batches using Generator pattern * Minimizes memory usage by yielding batches at a time (default: 100) * * @param gpuInstanceTypeIds - Array of database GPU instance type IDs * @param regionIds - Array of database region IDs * @param dbGpuInstanceMap - Map of GPU instance type ID to DB instance data * @param rawPlanMap - Map of plan_id (API ID) to raw Vultr plan data * @param env - Environment configuration for SYNC_BATCH_SIZE * @yields Batches of GpuPricingInput records (configurable batch size) * * Manual Test: * For typical Vultr GPU instances (~35 vcg types × 20 regions = 700 records): * - Default batch size (100): ~7 batches * - Memory savings: ~85% (700 records → 100 records in memory) * - Verify: Check logs for "Generated and upserted GPU pricing records for Vultr" */ private *generateVultrGpuPricingBatches( gpuInstanceTypeIds: number[], regionIds: number[], dbGpuInstanceMap: Map, rawPlanMap: Map, env?: Env ): Generator { const BATCH_SIZE = Math.min( Math.max(parseInt(env?.SYNC_BATCH_SIZE || '500', 10) || 500, 1), 1000 ); let batch: GpuPricingInput[] = []; for (const regionId of regionIds) { for (const gpuInstanceId of gpuInstanceTypeIds) { const dbInstance = dbGpuInstanceMap.get(gpuInstanceId); if (!dbInstance) { this.logger.warn('GPU instance type not found', { gpuInstanceId }); continue; } const rawPlan = rawPlanMap.get(dbInstance.instance_id); if (!rawPlan) { this.logger.warn('Raw GPU plan data not found', { instance_id: dbInstance.instance_id }); continue; } // Calculate hourly price: monthly_cost / 730 hours const hourlyPrice = rawPlan.monthly_cost / 730; batch.push({ gpu_instance_id: gpuInstanceId, region_id: regionId, hourly_price: hourlyPrice, monthly_price: rawPlan.monthly_cost, currency: 'USD', available: 1, }); if (batch.length >= BATCH_SIZE) { yield batch; batch = []; } } } // Yield remaining records if (batch.length > 0) { yield batch; } } /** * Generate G8 pricing records in batches for Linode * Similar to GPU pricing generator but for G8 instances */ private *generateLinodeG8PricingBatches( g8InstanceTypeIds: number[], regionIds: number[], dbG8InstanceMap: Map, rawInstanceMap: Map, env?: Env ): Generator { const BATCH_SIZE = Math.min( Math.max(parseInt(env?.SYNC_BATCH_SIZE || '500', 10) || 500, 1), 1000 ); let batch: G8PricingInput[] = []; for (const regionId of regionIds) { for (const g8InstanceId of g8InstanceTypeIds) { const dbInstance = dbG8InstanceMap.get(g8InstanceId); if (!dbInstance) { this.logger.warn('G8 instance type not found', { g8InstanceId }); continue; } const rawInstance = rawInstanceMap.get(dbInstance.instance_id); if (!rawInstance) { this.logger.warn('Raw G8 instance data not found', { instance_id: dbInstance.instance_id }); continue; } batch.push({ g8_instance_id: g8InstanceId, region_id: regionId, hourly_price: rawInstance.price.hourly, monthly_price: rawInstance.price.monthly, currency: 'USD', available: 1, }); if (batch.length >= BATCH_SIZE) { yield batch; batch = []; } } } // Yield remaining records if (batch.length > 0) { yield batch; } } /** * Generate VPU pricing records in batches for Linode * Similar to GPU pricing generator but for VPU instances */ private *generateLinodeVpuPricingBatches( vpuInstanceTypeIds: number[], regionIds: number[], dbVpuInstanceMap: Map, rawInstanceMap: Map, env?: Env ): Generator { const BATCH_SIZE = Math.min( Math.max(parseInt(env?.SYNC_BATCH_SIZE || '500', 10) || 500, 1), 1000 ); let batch: VpuPricingInput[] = []; for (const regionId of regionIds) { for (const vpuInstanceId of vpuInstanceTypeIds) { const dbInstance = dbVpuInstanceMap.get(vpuInstanceId); if (!dbInstance) { this.logger.warn('VPU instance type not found', { vpuInstanceId }); continue; } const rawInstance = rawInstanceMap.get(dbInstance.instance_id); if (!rawInstance) { this.logger.warn('Raw VPU instance data not found', { instance_id: dbInstance.instance_id }); continue; } batch.push({ vpu_instance_id: vpuInstanceId, region_id: regionId, hourly_price: rawInstance.price.hourly, monthly_price: rawInstance.price.monthly, currency: 'USD', available: 1, }); if (batch.length >= BATCH_SIZE) { yield batch; batch = []; } } } // Yield remaining records if (batch.length > 0) { yield batch; } } /** * Synchronize Anvil pricing based on source provider pricing * * Updates anvil_pricing table with retail prices calculated from source pricing * Formula: retail = cost × 1.21 (10% margin + 10% VAT) * * @param provider - Source provider name (linode, vultr, aws) * @returns Number of anvil_pricing records updated */ private async syncAnvilPricing(provider: string): Promise { this.logger.info('Starting Anvil pricing sync', { provider }); try { // Step 1: Find all anvil_regions sourced from this provider const anvilRegionsResult = await this.repos.db .prepare('SELECT id, source_region_id FROM anvil_regions WHERE source_provider = ?') .bind(provider) .all<{ id: number; source_region_id: number }>(); if (!anvilRegionsResult.success || anvilRegionsResult.results.length === 0) { this.logger.info('No anvil_regions found for provider', { provider }); return 0; } const anvilRegions = anvilRegionsResult.results; this.logger.info('Found anvil_regions', { provider, count: anvilRegions.length }); // Step 2: Find all anvil_pricing records with source_instance_id const anvilPricingResult = await this.repos.db .prepare(` SELECT ap.id, ap.anvil_instance_id, ap.anvil_region_id, ap.source_instance_id, ar.source_region_id FROM anvil_pricing ap JOIN anvil_regions ar ON ap.anvil_region_id = ar.id WHERE ar.source_provider = ? AND ap.source_instance_id IS NOT NULL `) .bind(provider) .all<{ id: number; anvil_instance_id: number; anvil_region_id: number; source_instance_id: number; source_region_id: number; }>(); if (!anvilPricingResult.success || anvilPricingResult.results.length === 0) { this.logger.info('No anvil_pricing records found with source_instance_id', { provider }); return 0; } const anvilPricingRecords = anvilPricingResult.results; this.logger.info('Found anvil_pricing records to update', { provider, count: anvilPricingRecords.length }); // Step 4: Fetch source pricing data with paired conditions // Batch queries to avoid SQLite limits (max 100 pairs per query) const CHUNK_SIZE = 100; const allSourcePricing: Array<{ instance_type_id: number; region_id: number; hourly_price: number; monthly_price: number; }> = []; for (let i = 0; i < anvilPricingRecords.length; i += CHUNK_SIZE) { const chunk = anvilPricingRecords.slice(i, i + CHUNK_SIZE); if (chunk.length === 0) continue; const conditions = chunk .map(() => '(instance_type_id = ? AND region_id = ?)') .join(' OR '); const params = chunk.flatMap(r => [r.source_instance_id, r.source_region_id]); const chunkResult = await this.repos.db .prepare(` SELECT instance_type_id, region_id, hourly_price, monthly_price FROM pricing WHERE ${conditions} `) .bind(...params) .all<{ instance_type_id: number; region_id: number; hourly_price: number; monthly_price: number; }>(); if (chunkResult.success && chunkResult.results) { allSourcePricing.push(...chunkResult.results); } } if (allSourcePricing.length === 0) { this.logger.warn('No source pricing data found', { provider }); return 0; } // Step 5: Build lookup map: `${instance_type_id}_${region_id}` → pricing const sourcePricingMap = new Map( allSourcePricing.map(p => [ `${p.instance_type_id}_${p.region_id}`, { hourly_price: p.hourly_price, monthly_price: p.monthly_price } ]) ); // Step 6: Prepare update statements const updateStatements: D1PreparedStatement[] = []; for (const record of anvilPricingRecords) { const lookupKey = `${record.source_instance_id}_${record.source_region_id}`; const sourcePricing = sourcePricingMap.get(lookupKey); if (!sourcePricing) { this.logger.warn('Source pricing not found', { anvil_pricing_id: record.id, source_instance_id: record.source_instance_id, source_region_id: record.source_region_id }); continue; } // Calculate retail prices: cost × 1.21 const hourlyPrice = calculateRetailHourly(sourcePricing.hourly_price); const monthlyPrice = calculateRetailMonthly(sourcePricing.monthly_price); updateStatements.push( this.repos.db.prepare(` UPDATE anvil_pricing SET hourly_price = ?, monthly_price = ? WHERE id = ? `).bind( hourlyPrice, monthlyPrice, record.id ) ); } if (updateStatements.length === 0) { this.logger.info('No anvil_pricing records to update', { provider }); return 0; } // Step 7: Execute batch update const results = await this.repos.db.batch(updateStatements); const successCount = results.reduce( (sum, result) => sum + (result.meta?.changes ?? 0), 0 ); this.logger.info('Anvil pricing sync completed', { provider, updated: successCount, total: updateStatements.length }); return successCount; } catch (error) { this.logger.error('Anvil pricing sync failed', { provider, error: error instanceof Error ? error.message : String(error) }); throw error; } } /** * Create connector for a specific provider * * @param provider - Provider name * @param providerId - Database provider ID * @returns Connector adapter instance for the provider * @throws Error if provider is not supported */ private async createConnector(provider: string, providerId: number): Promise { switch (provider.toLowerCase()) { case 'linode': { const connector = new LinodeConnector(this.env); // Cache instance types for pricing extraction let cachedInstanceTypes: Awaited> | null = null; return { authenticate: () => connector.initialize(), getRegions: async () => { const regions = await connector.fetchRegions(); return regions.map(r => connector.normalizeRegion(r, providerId)); }, getInstanceTypes: async () => { const instances = await connector.fetchInstanceTypes(); cachedInstanceTypes = instances; // Cache for pricing // Classification priority: // 1. GPU (gpus > 0) → handled in getGpuInstances // 2. VPU (id contains 'netint' or 'accelerated') → handled in getVpuInstances // 3. G8 (id starts with 'g8-') → handled in getG8Instances // 4. Default → regular instance_types const regularInstances = instances.filter(i => { if (i.gpus > 0) return false; if (i.id.includes('netint') || i.id.includes('accelerated')) return false; if (i.id.startsWith('g8-')) return false; return true; }); return regularInstances.map(i => connector.normalizeInstance(i, providerId)); }, getGpuInstances: async (): Promise => { // Use cached instances if available to avoid redundant API calls if (!cachedInstanceTypes) { this.logger.info('Fetching instance types for GPU extraction'); cachedInstanceTypes = await connector.fetchInstanceTypes(); } // Filter and normalize GPU instances const gpuInstances = cachedInstanceTypes.filter(i => i.gpus > 0); return gpuInstances.map(i => connector.normalizeGpuInstance(i, providerId)); }, getG8Instances: async (): Promise => { // Use cached instances if available to avoid redundant API calls if (!cachedInstanceTypes) { this.logger.info('Fetching instance types for G8 extraction'); cachedInstanceTypes = await connector.fetchInstanceTypes(); } // Filter and normalize G8 instances (g8- prefix) const g8Instances = cachedInstanceTypes.filter(i => i.id.startsWith('g8-') && (!i.gpus || i.gpus === 0) ); return g8Instances.map(i => connector.normalizeG8Instance(i, providerId)); }, getVpuInstances: async (): Promise => { // Use cached instances if available to avoid redundant API calls if (!cachedInstanceTypes) { this.logger.info('Fetching instance types for VPU extraction'); cachedInstanceTypes = await connector.fetchInstanceTypes(); } // Filter and normalize VPU instances (netint or accelerated) const vpuInstances = cachedInstanceTypes.filter(i => (i.id.includes('netint') || i.id.includes('accelerated')) && (!i.gpus || i.gpus === 0) ); return vpuInstances.map(i => connector.normalizeVpuInstance(i, providerId)); }, getPricing: async ( _instanceTypeIds: number[], regionIds: number[], dbInstanceMap: Map, dbGpuMap?: Map, dbG8Map?: Map, dbVpuMap?: Map ): Promise => { /** * Linode Pricing Extraction Strategy (Generator Pattern): * * Linode pricing is embedded in instance type data (price.hourly, price.monthly). * Generate all region × instance combinations using generator pattern. * GPU instances are separated and stored in gpu_pricing table. * * Expected volume: ~190 regular + ~10 GPU instances × 20 regions = ~4,000 pricing records * Generator pattern with 100 records/batch minimizes memory usage * Each batch is immediately persisted to database to avoid memory buildup * * Memory savings: ~95% (4,000 records → 100 records in memory at a time) * * Manual Test: * 1. Run sync: curl -X POST http://localhost:8787/api/sync/linode * 2. Verify regular pricing: wrangler d1 execute cloud-instances-db --local --command "SELECT COUNT(*) FROM pricing WHERE instance_type_id IN (SELECT id FROM instance_types WHERE provider_id = (SELECT id FROM providers WHERE name = 'linode'))" * 3. Verify GPU pricing: wrangler d1 execute cloud-instances-db --local --command "SELECT COUNT(*) FROM gpu_pricing WHERE gpu_instance_id IN (SELECT id FROM gpu_instances WHERE provider_id = (SELECT id FROM providers WHERE name = 'linode'))" * 4. Sample pricing: wrangler d1 execute cloud-instances-db --local --command "SELECT p.*, i.instance_name, r.region_code FROM pricing p JOIN instance_types i ON p.instance_type_id = i.id JOIN regions r ON p.region_id = r.id WHERE i.provider_id = (SELECT id FROM providers WHERE name = 'linode') LIMIT 10" */ // Re-fetch instance types if not cached if (!cachedInstanceTypes) { this.logger.info('Fetching instance types for pricing extraction'); cachedInstanceTypes = await connector.fetchInstanceTypes(); } // Create lookup map for raw instance data by instance_id (API ID) const rawInstanceMap = new Map( cachedInstanceTypes.map(i => [i.id, i]) ); // Use provided maps or create empty ones const gpuMap = dbGpuMap || new Map(); const g8Map = dbG8Map || new Map(); const vpuMap = dbVpuMap || new Map(); // Separate instances by type: GPU, VPU, G8, and regular const gpuInstanceTypeIds: number[] = []; const g8InstanceTypeIds: number[] = []; const vpuInstanceTypeIds: number[] = []; const regularInstanceTypeIds: number[] = []; // Extract GPU instance IDs from gpuMap for (const dbId of gpuMap.keys()) { gpuInstanceTypeIds.push(dbId); } // Extract G8 instance IDs from g8Map for (const dbId of g8Map.keys()) { g8InstanceTypeIds.push(dbId); } // Extract VPU instance IDs from vpuMap for (const dbId of vpuMap.keys()) { vpuInstanceTypeIds.push(dbId); } // Regular instances from dbInstanceMap for (const dbId of dbInstanceMap.keys()) { regularInstanceTypeIds.push(dbId); } // Process regular instance pricing let regularPricingCount = 0; if (regularInstanceTypeIds.length > 0) { const regularGenerator = this.generateLinodePricingBatches( regularInstanceTypeIds, regionIds, dbInstanceMap, rawInstanceMap, this.env ); for (const batch of regularGenerator) { const batchCount = await this.repos.pricing.upsertMany(batch); regularPricingCount += batchCount; } } // Process GPU instance pricing let gpuPricingCount = 0; if (gpuInstanceTypeIds.length > 0) { const gpuGenerator = this.generateLinodeGpuPricingBatches( gpuInstanceTypeIds, regionIds, gpuMap, rawInstanceMap, this.env ); for (const batch of gpuGenerator) { const batchCount = await this.repos.gpuPricing.upsertMany(batch); gpuPricingCount += batchCount; } } // Process G8 instance pricing let g8PricingCount = 0; if (g8InstanceTypeIds.length > 0) { const g8Generator = this.generateLinodeG8PricingBatches( g8InstanceTypeIds, regionIds, g8Map, rawInstanceMap, this.env ); for (const batch of g8Generator) { const batchCount = await this.repos.g8Pricing.upsertMany(batch); g8PricingCount += batchCount; } } // Process VPU instance pricing let vpuPricingCount = 0; if (vpuInstanceTypeIds.length > 0) { const vpuGenerator = this.generateLinodeVpuPricingBatches( vpuInstanceTypeIds, regionIds, vpuMap, rawInstanceMap, this.env ); for (const batch of vpuGenerator) { const batchCount = await this.repos.vpuPricing.upsertMany(batch); vpuPricingCount += batchCount; } } const totalCount = regularPricingCount + gpuPricingCount + g8PricingCount + vpuPricingCount; this.logger.info('Generated and upserted pricing records for Linode', { regular_pricing: regularPricingCount, gpu_pricing: gpuPricingCount, g8_pricing: g8PricingCount, vpu_pricing: vpuPricingCount, total: totalCount }); // Return total count of processed records return totalCount; }, }; } case 'vultr': { const connector = new VultrConnector(this.env); // Cache plans for pricing extraction let cachedPlans: Awaited> | null = null; return { authenticate: () => connector.initialize(), getRegions: async () => { const regions = await connector.fetchRegions(); return regions.map(r => connector.normalizeRegion(r, providerId)); }, getInstanceTypes: async () => { const plans = await connector.fetchPlans(); cachedPlans = plans; // Cache for pricing // Filter out GPU instances (vcg type) const regularPlans = plans.filter(p => !p.id.startsWith('vcg')); return regularPlans.map(p => connector.normalizeInstance(p, providerId)); }, getGpuInstances: async (): Promise => { // Use cached plans if available to avoid redundant API calls if (!cachedPlans) { this.logger.info('Fetching plans for GPU extraction'); cachedPlans = await connector.fetchPlans(); } // Filter and normalize GPU instances (vcg type) const gpuPlans = cachedPlans.filter(p => p.id.startsWith('vcg')); return gpuPlans.map(p => connector.normalizeGpuInstance(p, providerId)); }, getPricing: async ( instanceTypeIds: number[], regionIds: number[], dbInstanceMap: Map, dbGpuMap?: Map ): Promise => { /** * Vultr Pricing Extraction Strategy (Generator Pattern): * * Vultr pricing is embedded in plan data (monthly_cost). * Generate all region × plan combinations using generator pattern. * * Expected volume: ~100 regular plans × 20 regions = ~2,000 pricing records * ~35 GPU plans × 20 regions = ~700 GPU pricing records * Generator pattern with 100 records/batch minimizes memory usage * Each batch is immediately persisted to database to avoid memory buildup * * Memory savings: ~95% (2,700 records → 100 records in memory at a time) * * Manual Test: * 1. Run sync: curl -X POST http://localhost:8787/api/sync/vultr * 2. Verify pricing count: wrangler d1 execute cloud-instances-db --local --command "SELECT COUNT(*) FROM pricing WHERE instance_type_id IN (SELECT id FROM instance_types WHERE provider_id = (SELECT id FROM providers WHERE name = 'vultr'))" * 3. Verify GPU pricing: wrangler d1 execute cloud-instances-db --local --command "SELECT COUNT(*) FROM gpu_pricing WHERE gpu_instance_id IN (SELECT id FROM gpu_instances WHERE provider_id = (SELECT id FROM providers WHERE name = 'vultr'))" * 4. Sample pricing: wrangler d1 execute cloud-instances-db --local --command "SELECT p.*, i.instance_name, r.region_code FROM pricing p JOIN instance_types i ON p.instance_type_id = i.id JOIN regions r ON p.region_id = r.id WHERE i.provider_id = (SELECT id FROM providers WHERE name = 'vultr') LIMIT 10" * 5. Verify data integrity: wrangler d1 execute cloud-instances-db --local --command "SELECT COUNT(*) FROM pricing WHERE hourly_price = 0 OR monthly_price = 0" */ // Re-fetch plans if not cached if (!cachedPlans) { this.logger.info('Fetching plans for pricing extraction'); cachedPlans = await connector.fetchPlans(); } // Create lookup map for raw plan data by plan ID (API ID) const rawPlanMap = new Map( cachedPlans.map(p => [p.id, p]) ); // Process regular instance pricing let regularPricingCount = 0; if (instanceTypeIds.length > 0) { const regularGenerator = this.generateVultrPricingBatches( instanceTypeIds, regionIds, dbInstanceMap, rawPlanMap, this.env ); for (const batch of regularGenerator) { const batchCount = await this.repos.pricing.upsertMany(batch); regularPricingCount += batchCount; } } // Process GPU instance pricing let gpuPricingCount = 0; const gpuMap = dbGpuMap || new Map(); if (gpuMap.size > 0) { const gpuInstanceTypeIds = Array.from(gpuMap.keys()); const gpuGenerator = this.generateVultrGpuPricingBatches( gpuInstanceTypeIds, regionIds, gpuMap, rawPlanMap, this.env ); for (const batch of gpuGenerator) { const batchCount = await this.repos.gpuPricing.upsertMany(batch); gpuPricingCount += batchCount; } } const totalCount = regularPricingCount + gpuPricingCount; this.logger.info('Generated and upserted pricing records for Vultr', { regular_pricing: regularPricingCount, gpu_pricing: gpuPricingCount, total: totalCount }); // Return total count of processed records return totalCount; }, }; } case 'aws': { const connector = new AWSConnector(this.env); // Cache instance types for pricing extraction let cachedInstanceTypes: Awaited> | null = null; return { authenticate: () => connector.initialize(), getRegions: async () => { const regions = await connector.fetchRegions(); return regions.map(r => connector.normalizeRegion(r, providerId)); }, getInstanceTypes: async () => { const instances = await connector.fetchInstanceTypes(); cachedInstanceTypes = instances; // Cache for pricing return instances.map(i => connector.normalizeInstance(i, providerId)); }, getPricing: async ( instanceTypeIds: number[], regionIds: number[], dbInstanceMap: Map ): Promise => { /** * AWS Pricing Extraction Strategy (Generator Pattern): * * AWS pricing from ec2.shop is region-agnostic (same price globally). * Generate all region × instance combinations using generator pattern. * * Expected volume: ~870 instances × 29 regions = ~25,230 pricing records * Generator pattern with 100 records/batch minimizes memory usage * Each batch is immediately persisted to database to avoid memory buildup * * Manual Test: * 1. Run sync: curl -X POST http://localhost:8787/api/sync/aws * 2. Verify pricing count: wrangler d1 execute cloud-instances-db --local --command "SELECT COUNT(*) FROM pricing WHERE instance_type_id IN (SELECT id FROM instance_types WHERE provider_id = (SELECT id FROM providers WHERE name = 'aws'))" * 3. Sample pricing: wrangler d1 execute cloud-instances-db --local --command "SELECT p.*, i.instance_name, r.region_code FROM pricing p JOIN instance_types i ON p.instance_type_id = i.id JOIN regions r ON p.region_id = r.id WHERE i.provider_id = (SELECT id FROM providers WHERE name = 'aws') LIMIT 10" * 4. Verify data integrity: wrangler d1 execute cloud-instances-db --local --command "SELECT COUNT(*) FROM pricing WHERE hourly_price = 0 OR monthly_price = 0" */ // Re-fetch instance types if not cached if (!cachedInstanceTypes) { this.logger.info('Fetching instance types for pricing extraction'); cachedInstanceTypes = await connector.fetchInstanceTypes(); } // Create lookup map for raw instance data by instance_id (API ID) const rawInstanceMap = new Map( cachedInstanceTypes.map(i => [i.InstanceType, i]) ); // Use generator pattern for memory-efficient processing const pricingGenerator = this.generateAWSPricingBatches( instanceTypeIds, regionIds, dbInstanceMap, rawInstanceMap ); // Process batches incrementally let totalCount = 0; for (const batch of pricingGenerator) { const batchCount = await this.repos.pricing.upsertMany(batch); totalCount += batchCount; } this.logger.info('Generated and upserted pricing records for AWS', { count: totalCount }); // Return total count of processed records return totalCount; }, }; } default: throw new Error(`Unsupported provider: ${provider}`); } } }