Files
cloud-server/src/services/sync.ts
kappa 3a8dd705e6 refactor: comprehensive code review fixes (security, performance, QA)
## Security Improvements
- Fix timing attack in verifyApiKey with fixed 256-byte buffer
- Fix sortOrder SQL injection with whitelist validation
- Fix rate limiting bypass for non-Cloudflare traffic (fail-closed)
- Remove stack trace exposure in error responses
- Add request_id for audit trail (X-Request-ID header)
- Sanitize origin header to prevent log injection
- Add content-length validation for /sync endpoint (10KB limit)
- Replace Math.random() with crypto.randomUUID() for sync IDs
- Expand sensitive data masking patterns (8 → 18)

## Performance Improvements
- Reduce rate limiter KV reads from 3 to 1 per request (66% reduction)
- Increase sync batch size from 100 to 500 (80% fewer batches)
- Fix health check N+1 query with efficient JOINs
- Fix COUNT(*) Cartesian product with COUNT(DISTINCT)
- Implement shared logger cache pattern across repositories
- Add CacheService singleton pattern in recommend.ts
- Add composite index for recommendation queries
- Implement Anvil pricing query batching (100 per chunk)

## QA Improvements
- Add BATCH_SIZE bounds validation (1-1000)
- Add pagination bounds (page >= 1, MAX_OFFSET = 100000)
- Add min/max range consistency validation
- Add DB reference validation for singleton services
- Add type guards for database result validation
- Add timeout mechanism for external API calls (10-60s)
- Use SUPPORTED_PROVIDERS constant instead of hardcoded list

## Removed
- Remove Vault integration (using Wrangler secrets)
- Remove 6-hour pricing cron (daily sync only)

## Configuration
- Add idx_instance_types_specs_filter composite index
- Add CORS Access-Control-Expose-Headers

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-25 23:50:37 +09:00

1545 lines
57 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* Sync Service - Orchestrates synchronization of cloud provider data
*
* Features:
* - Multi-provider synchronization (Linode, Vultr, AWS)
* - Stage-based sync process with error recovery
* - Provider status tracking and reporting
* - Batch operations for efficiency
*
* @example
* const orchestrator = new SyncOrchestrator(db, env);
* const report = await orchestrator.syncAll(['linode']);
*/
import { LinodeConnector } from '../connectors/linode';
import { VultrConnector } from '../connectors/vultr';
import { AWSConnector } from '../connectors/aws';
import { RepositoryFactory } from '../repositories';
import { createLogger } from '../utils/logger';
import { calculateRetailHourly, calculateRetailMonthly, SUPPORTED_PROVIDERS } from '../constants';
import type {
Env,
ProviderSyncResult,
SyncReport,
RegionInput,
InstanceTypeInput,
PricingInput,
GpuInstanceInput,
GpuPricingInput,
G8InstanceInput,
G8PricingInput,
VpuInstanceInput,
VpuPricingInput,
} from '../types';
import { SyncStage } from '../types';
/**
* Wraps a promise with a timeout
* @param promise - The promise to wrap
* @param ms - Timeout in milliseconds
* @param operation - Operation name for error message
* @returns Promise result if completed within timeout
* @throws Error if operation times out
*/
async function withTimeout<T>(promise: Promise<T>, ms: number, operation: string): Promise<T> {
let timeoutId: ReturnType<typeof setTimeout>;
const timeoutPromise = new Promise<never>((_, reject) => {
timeoutId = setTimeout(() => reject(new Error(`${operation} timed out after ${ms}ms`)), ms);
});
try {
return await Promise.race([promise, timeoutPromise]);
} finally {
clearTimeout(timeoutId!);
}
}
/**
* Cloud provider connector interface for SyncOrchestrator
*
* This is an adapter interface used by SyncOrchestrator to abstract
* provider-specific implementations. Actual provider connectors (LinodeConnector,
* VultrConnector, etc.) extend CloudConnector from base.ts and are wrapped
* by this interface in createConnector().
*/
export interface SyncConnectorAdapter {
/** Authenticate and validate credentials */
authenticate(): Promise<void>;
/** Fetch all available regions (normalized) */
getRegions(): Promise<RegionInput[]>;
/** Fetch all instance types (normalized) */
getInstanceTypes(): Promise<InstanceTypeInput[]>;
/** Fetch GPU instances (optional, only for providers with GPU support) */
getGpuInstances?(): Promise<GpuInstanceInput[]>;
/** Fetch G8 instances (optional, only for Linode) */
getG8Instances?(): Promise<G8InstanceInput[]>;
/** Fetch VPU instances (optional, only for Linode) */
getVpuInstances?(): Promise<VpuInstanceInput[]>;
/**
* Fetch pricing data for instances and regions
* @param instanceTypeIds - Array of database instance type IDs
* @param regionIds - Array of database region IDs
* @param dbInstanceMap - Map of DB instance type ID to instance_id (API ID) for avoiding redundant queries
* @param dbGpuMap - Map of GPU instance IDs (optional)
* @param dbG8Map - Map of G8 instance IDs (optional)
* @param dbVpuMap - Map of VPU instance IDs (optional)
* @returns Array of pricing records OR number of records if batched internally
*/
getPricing(
instanceTypeIds: number[],
regionIds: number[],
dbInstanceMap: Map<number, { instance_id: string }>,
dbGpuMap?: Map<number, { instance_id: string }>,
dbG8Map?: Map<number, { instance_id: string }>,
dbVpuMap?: Map<number, { instance_id: string }>
): Promise<PricingInput[] | number>;
}
/**
* Sync orchestrator for managing provider synchronization
*/
export class SyncOrchestrator {
private repos: RepositoryFactory;
private logger: ReturnType<typeof createLogger>;
constructor(
db: D1Database,
private env: Env
) {
this.repos = new RepositoryFactory(db, env);
this.logger = createLogger('[SyncOrchestrator]', env);
this.logger.info('Initialized');
}
/**
* Synchronize a single provider
*
* @param provider - Provider name (linode, vultr, aws)
* @returns Sync result with statistics and error information
*/
async syncProvider(provider: string): Promise<ProviderSyncResult> {
const startTime = Date.now();
let stage = SyncStage.INIT;
this.logger.info('Starting sync for provider', { provider });
try {
// Stage 1: Initialize - Fetch provider record ONCE
stage = SyncStage.INIT;
const providerRecord = await this.repos.providers.findByName(provider);
if (!providerRecord) {
throw new Error(`Provider not found in database: ${provider}`);
}
// Update provider status to syncing
await this.repos.providers.updateSyncStatus(provider, 'syncing');
this.logger.info(`${provider}${stage}`);
// Stage 2: Initialize connector and authenticate
const connector = await this.createConnector(provider, providerRecord.id);
await withTimeout(connector.authenticate(), 10000, `${provider} authentication`);
this.logger.info(`${provider} → initialized`);
// Stage 3: Fetch regions from provider API
stage = SyncStage.FETCH_REGIONS;
const regions = await withTimeout(connector.getRegions(), 15000, `${provider} fetch regions`);
this.logger.info(`${provider}${stage}`, { regions: regions.length });
// Stage 4: Fetch instance types from provider API
stage = SyncStage.FETCH_INSTANCES;
const instances = await withTimeout(connector.getInstanceTypes(), 30000, `${provider} fetch instances`);
this.logger.info(`${provider}${stage}`, { instances: instances.length });
// Stage 5: Normalize data (add provider_id)
stage = SyncStage.NORMALIZE;
const normalizedRegions = regions.map(r => ({
...r,
provider_id: providerRecord.id,
}));
const normalizedInstances = instances.map(i => ({
...i,
provider_id: providerRecord.id,
}));
this.logger.info(`${provider}${stage}`);
// Stage 6: Persist to database
stage = SyncStage.PERSIST;
const regionsCount = await this.repos.regions.upsertMany(
providerRecord.id,
normalizedRegions
);
// Persist regular instances (already filtered in getInstanceTypes)
const regularInstancesCount = await this.repos.instances.upsertMany(
providerRecord.id,
normalizedInstances
);
// Handle specialized instances separately for Linode and Vultr
let gpuInstancesCount = 0;
let g8InstancesCount = 0;
let vpuInstancesCount = 0;
if (provider.toLowerCase() === 'linode') {
// GPU instances
if (connector.getGpuInstances) {
const gpuInstances = await withTimeout(connector.getGpuInstances(), 15000, `${provider} fetch GPU instances`);
if (gpuInstances && gpuInstances.length > 0) {
gpuInstancesCount = await this.repos.gpuInstances.upsertMany(
providerRecord.id,
gpuInstances
);
}
}
// G8 instances
if (connector.getG8Instances) {
const g8Instances = await withTimeout(connector.getG8Instances(), 15000, `${provider} fetch G8 instances`);
if (g8Instances && g8Instances.length > 0) {
g8InstancesCount = await this.repos.g8Instances.upsertMany(
providerRecord.id,
g8Instances
);
}
}
// VPU instances
if (connector.getVpuInstances) {
const vpuInstances = await withTimeout(connector.getVpuInstances(), 15000, `${provider} fetch VPU instances`);
if (vpuInstances && vpuInstances.length > 0) {
vpuInstancesCount = await this.repos.vpuInstances.upsertMany(
providerRecord.id,
vpuInstances
);
}
}
}
// Handle Vultr GPU instances
if (provider.toLowerCase() === 'vultr') {
if (connector.getGpuInstances) {
const gpuInstances = await withTimeout(connector.getGpuInstances(), 15000, `${provider} fetch GPU instances`);
if (gpuInstances && gpuInstances.length > 0) {
gpuInstancesCount = await this.repos.gpuInstances.upsertMany(
providerRecord.id,
gpuInstances
);
}
}
}
const instancesCount = regularInstancesCount + gpuInstancesCount + g8InstancesCount + vpuInstancesCount;
// Fetch pricing data - need instance and region IDs from DB
// Use D1 batch to reduce query count (fetch all instance types in one batch)
const batchQueries = [
this.repos.db.prepare('SELECT id, region_code FROM regions WHERE provider_id = ?').bind(providerRecord.id),
this.repos.db.prepare('SELECT id, instance_id FROM instance_types WHERE provider_id = ?').bind(providerRecord.id),
this.repos.db.prepare('SELECT id, instance_id FROM gpu_instances WHERE provider_id = ?').bind(providerRecord.id),
this.repos.db.prepare('SELECT id, instance_id FROM g8_instances WHERE provider_id = ?').bind(providerRecord.id),
this.repos.db.prepare('SELECT id, instance_id FROM vpu_instances WHERE provider_id = ?').bind(providerRecord.id)
];
const [dbRegionsResult, dbInstancesResult, dbGpuResult, dbG8Result, dbVpuResult] = await this.repos.db.batch(batchQueries);
if (!dbRegionsResult.success || !dbInstancesResult.success) {
throw new Error('Failed to fetch regions/instances for pricing');
}
// Validate and extract region IDs
if (!Array.isArray(dbRegionsResult.results)) {
throw new Error('Unexpected database result format for regions');
}
const regionIds = dbRegionsResult.results.map((r: any) => {
if (typeof r?.id !== 'number') {
throw new Error('Invalid region id in database result');
}
return r.id;
});
// Validate and extract instance type data
if (!Array.isArray(dbInstancesResult.results)) {
throw new Error('Unexpected database result format for instances');
}
const dbInstancesData = dbInstancesResult.results.map((i: any) => {
if (typeof i?.id !== 'number' || typeof i?.instance_id !== 'string') {
throw new Error('Invalid instance data in database result');
}
return { id: i.id, instance_id: i.instance_id };
});
const instanceTypeIds = dbInstancesData.map(i => i.id);
// Create instance mapping to avoid redundant queries in getPricing
const dbInstanceMap = new Map(
dbInstancesData.map(i => [i.id, { instance_id: i.instance_id }])
);
// Create specialized instance mappings with validation
if (!Array.isArray(dbGpuResult.results)) {
throw new Error('Unexpected database result format for GPU instances');
}
const dbGpuMap = new Map(
dbGpuResult.results.map((i: any) => {
if (typeof i?.id !== 'number' || typeof i?.instance_id !== 'string') {
throw new Error('Invalid GPU instance data in database result');
}
return [i.id, { instance_id: i.instance_id }];
})
);
if (!Array.isArray(dbG8Result.results)) {
throw new Error('Unexpected database result format for G8 instances');
}
const dbG8Map = new Map(
dbG8Result.results.map((i: any) => {
if (typeof i?.id !== 'number' || typeof i?.instance_id !== 'string') {
throw new Error('Invalid G8 instance data in database result');
}
return [i.id, { instance_id: i.instance_id }];
})
);
if (!Array.isArray(dbVpuResult.results)) {
throw new Error('Unexpected database result format for VPU instances');
}
const dbVpuMap = new Map(
dbVpuResult.results.map((i: any) => {
if (typeof i?.id !== 'number' || typeof i?.instance_id !== 'string') {
throw new Error('Invalid VPU instance data in database result');
}
return [i.id, { instance_id: i.instance_id }];
})
);
// Get pricing data - may return array or count depending on provider
// Pass all instance maps for specialized pricing
const pricingResult = await withTimeout(
connector.getPricing(
instanceTypeIds,
regionIds,
dbInstanceMap,
dbGpuMap,
dbG8Map,
dbVpuMap
),
60000,
`${provider} fetch pricing`
);
// Handle both return types: array (Linode, Vultr) or number (AWS with generator)
let pricingCount = 0;
if (typeof pricingResult === 'number') {
// Provider processed batches internally, returned count
pricingCount = pricingResult;
} else if (pricingResult.length > 0) {
// Provider returned pricing array, upsert it
pricingCount = await this.repos.pricing.upsertMany(pricingResult);
}
this.logger.info(`${provider}${stage}`, {
regions: regionsCount,
regular_instances: regularInstancesCount,
gpu_instances: gpuInstancesCount,
g8_instances: g8InstancesCount,
vpu_instances: vpuInstancesCount,
total_instances: instancesCount,
pricing: pricingCount
});
// Stage 7: Validate
stage = SyncStage.VALIDATE;
if (regionsCount === 0 || instancesCount === 0) {
throw new Error('No data was synced - possible API or parsing issue');
}
this.logger.info(`${provider}${stage}`);
// Stage 8: Sync Anvil Pricing (if applicable)
stage = SyncStage.SYNC_ANVIL_PRICING;
let anvilPricingCount = 0;
try {
anvilPricingCount = await this.syncAnvilPricing(provider);
if (anvilPricingCount > 0) {
this.logger.info(`${provider}${stage}`, { anvil_pricing: anvilPricingCount });
}
} catch (anvilError) {
// Log error but don't fail the entire sync
this.logger.error('Anvil pricing sync failed', {
provider,
error: anvilError instanceof Error ? anvilError.message : String(anvilError)
});
}
// Stage 9: Complete - Update provider status to success
stage = SyncStage.COMPLETE;
await this.repos.providers.updateSyncStatus(provider, 'success');
const duration = Date.now() - startTime;
this.logger.info(`${provider}${stage}`, { duration_ms: duration });
return {
provider,
success: true,
regions_synced: regionsCount,
instances_synced: instancesCount,
pricing_synced: pricingCount,
duration_ms: duration,
};
} catch (error) {
const duration = Date.now() - startTime;
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
this.logger.error(`${provider} failed at ${stage}`, { error: error instanceof Error ? error.message : String(error), stage });
// Update provider status to error
try {
await this.repos.providers.updateSyncStatus(provider, 'error', errorMessage);
} catch (statusError) {
this.logger.error('Failed to update provider status', { error: statusError instanceof Error ? statusError.message : String(statusError) });
}
return {
provider,
success: false,
regions_synced: 0,
instances_synced: 0,
pricing_synced: 0,
duration_ms: duration,
error: errorMessage,
error_details: {
stage,
message: errorMessage,
// Stack trace logged server-side only, not exposed to clients
},
};
}
}
/**
* Synchronize all providers
*
* IMPORTANT: Providers are synced sequentially (not in parallel) to avoid
* exceeding Cloudflare Workers' 30-second CPU time limit. Each provider
* sync involves multiple API calls and database operations.
*
* For production deployments with large datasets, consider using
* Cloudflare Queues to process each provider as a separate job.
*
* @param providers - Array of provider names to sync (defaults to all supported providers)
* @returns Complete sync report with statistics
*/
async syncAll(providers: string[] = [...SUPPORTED_PROVIDERS]): Promise<SyncReport> {
const startedAt = new Date().toISOString();
const startTime = Date.now();
this.logger.info('Starting sequential sync for providers', { providers: providers.join(', ') });
// Run provider syncs sequentially to avoid CPU timeout
// Each provider sync is independent and can complete within time limits
const providerResults: ProviderSyncResult[] = [];
for (const provider of providers) {
try {
const result = await this.syncProvider(provider);
providerResults.push(result);
// Log progress after each provider
this.logger.info('Provider sync completed', {
provider,
success: result.success,
elapsed_ms: Date.now() - startTime
});
} catch (error) {
// Handle unexpected errors
providerResults.push({
provider,
success: false,
regions_synced: 0,
instances_synced: 0,
pricing_synced: 0,
duration_ms: 0,
error: error instanceof Error ? error.message : 'Unknown error',
});
}
}
const completedAt = new Date().toISOString();
const totalDuration = Date.now() - startTime;
// Calculate summary
const successful = providerResults.filter(r => r.success);
const failed = providerResults.filter(r => !r.success);
const summary = {
total_providers: providers.length,
successful_providers: successful.length,
failed_providers: failed.length,
total_regions: providerResults.reduce((sum, r) => sum + r.regions_synced, 0),
total_instances: providerResults.reduce((sum, r) => sum + r.instances_synced, 0),
total_pricing: providerResults.reduce((sum, r) => sum + r.pricing_synced, 0),
};
const report: SyncReport = {
success: failed.length === 0,
started_at: startedAt,
completed_at: completedAt,
total_duration_ms: totalDuration,
providers: providerResults,
summary,
};
this.logger.info('Sync complete', {
total: summary.total_providers,
success: summary.successful_providers,
failed: summary.failed_providers,
duration_ms: totalDuration,
});
return report;
}
/**
* Generate AWS pricing records in batches using Generator pattern
* Minimizes memory usage by yielding batches of 100 records at a time
*
* @param instanceTypeIds - Array of database instance type IDs
* @param regionIds - Array of database region IDs
* @param dbInstanceMap - Map of instance type ID to DB instance data
* @param rawInstanceMap - Map of instance_id (API ID) to raw AWS data
* @yields Batches of PricingInput records (100 per batch)
*
* Manual Test:
* Generator yields ~252 batches for ~25,230 total records (870 instances × 29 regions)
*/
private *generateAWSPricingBatches(
instanceTypeIds: number[],
regionIds: number[],
dbInstanceMap: Map<number, { instance_id: string }>,
rawInstanceMap: Map<string, { Cost: number; MonthlyPrice: number }>
): Generator<PricingInput[], void, void> {
const BATCH_SIZE = 500;
let batch: PricingInput[] = [];
for (const regionId of regionIds) {
for (const instanceTypeId of instanceTypeIds) {
const dbInstance = dbInstanceMap.get(instanceTypeId);
if (!dbInstance) {
this.logger.warn('Instance type not found', { instanceTypeId });
continue;
}
const rawInstance = rawInstanceMap.get(dbInstance.instance_id);
if (!rawInstance) {
this.logger.warn('Raw instance data not found', { instance_id: dbInstance.instance_id });
continue;
}
batch.push({
instance_type_id: instanceTypeId,
region_id: regionId,
hourly_price: rawInstance.Cost,
monthly_price: rawInstance.MonthlyPrice,
currency: 'USD',
available: 1,
});
if (batch.length >= BATCH_SIZE) {
yield batch;
batch = [];
}
}
}
// Yield remaining records
if (batch.length > 0) {
yield batch;
}
}
/**
* Generate Linode pricing records in batches using Generator pattern
* Minimizes memory usage by yielding batches at a time (default: 100)
*
* @param instanceTypeIds - Array of database instance type IDs
* @param regionIds - Array of database region IDs
* @param dbInstanceMap - Map of instance type ID to DB instance data
* @param rawInstanceMap - Map of instance_id (API ID) to raw Linode data
* @param env - Environment configuration for SYNC_BATCH_SIZE
* @yields Batches of PricingInput records (configurable batch size)
*
* Manual Test:
* For typical Linode deployment (~200 instance types × 20 regions = 4,000 records):
* - Default batch size (100): ~40 batches
* - Memory savings: ~95% (4,000 records → 100 records in memory)
* - Verify: Check logs for "Generated and upserted pricing records for Linode"
*/
private *generateLinodePricingBatches(
instanceTypeIds: number[],
regionIds: number[],
dbInstanceMap: Map<number, { instance_id: string }>,
rawInstanceMap: Map<string, { id: string; price: { hourly: number; monthly: number } }>,
env?: Env
): Generator<PricingInput[], void, void> {
const BATCH_SIZE = Math.min(
Math.max(parseInt(env?.SYNC_BATCH_SIZE || '500', 10) || 500, 1),
1000
);
let batch: PricingInput[] = [];
for (const regionId of regionIds) {
for (const instanceTypeId of instanceTypeIds) {
const dbInstance = dbInstanceMap.get(instanceTypeId);
if (!dbInstance) {
this.logger.warn('Instance type not found', { instanceTypeId });
continue;
}
const rawInstance = rawInstanceMap.get(dbInstance.instance_id);
if (!rawInstance) {
this.logger.warn('Raw instance data not found', { instance_id: dbInstance.instance_id });
continue;
}
batch.push({
instance_type_id: instanceTypeId,
region_id: regionId,
hourly_price: rawInstance.price.hourly,
monthly_price: rawInstance.price.monthly,
currency: 'USD',
available: 1,
});
if (batch.length >= BATCH_SIZE) {
yield batch;
batch = [];
}
}
}
// Yield remaining records
if (batch.length > 0) {
yield batch;
}
}
/**
* Generate Vultr pricing records in batches using Generator pattern
* Minimizes memory usage by yielding batches at a time (default: 100)
*
* @param instanceTypeIds - Array of database instance type IDs
* @param regionIds - Array of database region IDs
* @param dbInstanceMap - Map of instance type ID to DB instance data
* @param rawPlanMap - Map of plan_id (API ID) to raw Vultr plan data
* @param env - Environment configuration for SYNC_BATCH_SIZE
* @yields Batches of PricingInput records (configurable batch size)
*
* Manual Test:
* For typical Vultr deployment (~100 plans × 20 regions = 2,000 records):
* - Default batch size (100): ~20 batches
* - Memory savings: ~95% (2,000 records → 100 records in memory)
* - Verify: Check logs for "Generated and upserted pricing records for Vultr"
*/
private *generateVultrPricingBatches(
instanceTypeIds: number[],
regionIds: number[],
dbInstanceMap: Map<number, { instance_id: string }>,
rawPlanMap: Map<string, { id: string; monthly_cost: number }>,
env?: Env
): Generator<PricingInput[], void, void> {
const BATCH_SIZE = Math.min(
Math.max(parseInt(env?.SYNC_BATCH_SIZE || '500', 10) || 500, 1),
1000
);
let batch: PricingInput[] = [];
for (const regionId of regionIds) {
for (const instanceTypeId of instanceTypeIds) {
const dbInstance = dbInstanceMap.get(instanceTypeId);
if (!dbInstance) {
this.logger.warn('Instance type not found', { instanceTypeId });
continue;
}
const rawPlan = rawPlanMap.get(dbInstance.instance_id);
if (!rawPlan) {
this.logger.warn('Raw plan data not found', { instance_id: dbInstance.instance_id });
continue;
}
// Calculate hourly price: monthly_cost / 730 hours
const hourlyPrice = rawPlan.monthly_cost / 730;
batch.push({
instance_type_id: instanceTypeId,
region_id: regionId,
hourly_price: hourlyPrice,
monthly_price: rawPlan.monthly_cost,
currency: 'USD',
available: 1,
});
if (batch.length >= BATCH_SIZE) {
yield batch;
batch = [];
}
}
}
// Yield remaining records
if (batch.length > 0) {
yield batch;
}
}
/**
* Generate Linode GPU pricing records in batches using Generator pattern
* Minimizes memory usage by yielding batches at a time (default: 100)
*
* @param gpuInstanceTypeIds - Array of database GPU instance type IDs
* @param regionIds - Array of database region IDs
* @param dbGpuInstanceMap - Map of GPU instance type ID to DB instance data
* @param rawInstanceMap - Map of instance_id (API ID) to raw Linode data
* @param env - Environment configuration for SYNC_BATCH_SIZE
* @yields Batches of GpuPricingInput records (configurable batch size)
*
* Manual Test:
* For typical Linode GPU instances (~10 GPU types × 20 regions = 200 records):
* - Default batch size (100): ~2 batches
* - Memory savings: ~50% (200 records → 100 records in memory)
* - Verify: Check logs for "Generated and upserted GPU pricing records for Linode"
*/
private *generateLinodeGpuPricingBatches(
gpuInstanceTypeIds: number[],
regionIds: number[],
dbGpuInstanceMap: Map<number, { instance_id: string }>,
rawInstanceMap: Map<string, { id: string; price: { hourly: number; monthly: number } }>,
env?: Env
): Generator<GpuPricingInput[], void, void> {
const BATCH_SIZE = Math.min(
Math.max(parseInt(env?.SYNC_BATCH_SIZE || '500', 10) || 500, 1),
1000
);
let batch: GpuPricingInput[] = [];
for (const regionId of regionIds) {
for (const gpuInstanceId of gpuInstanceTypeIds) {
const dbInstance = dbGpuInstanceMap.get(gpuInstanceId);
if (!dbInstance) {
this.logger.warn('GPU instance type not found', { gpuInstanceId });
continue;
}
const rawInstance = rawInstanceMap.get(dbInstance.instance_id);
if (!rawInstance) {
this.logger.warn('Raw GPU instance data not found', { instance_id: dbInstance.instance_id });
continue;
}
batch.push({
gpu_instance_id: gpuInstanceId,
region_id: regionId,
hourly_price: rawInstance.price.hourly,
monthly_price: rawInstance.price.monthly,
currency: 'USD',
available: 1,
});
if (batch.length >= BATCH_SIZE) {
yield batch;
batch = [];
}
}
}
// Yield remaining records
if (batch.length > 0) {
yield batch;
}
}
/**
* Generate Vultr GPU pricing records in batches using Generator pattern
* Minimizes memory usage by yielding batches at a time (default: 100)
*
* @param gpuInstanceTypeIds - Array of database GPU instance type IDs
* @param regionIds - Array of database region IDs
* @param dbGpuInstanceMap - Map of GPU instance type ID to DB instance data
* @param rawPlanMap - Map of plan_id (API ID) to raw Vultr plan data
* @param env - Environment configuration for SYNC_BATCH_SIZE
* @yields Batches of GpuPricingInput records (configurable batch size)
*
* Manual Test:
* For typical Vultr GPU instances (~35 vcg types × 20 regions = 700 records):
* - Default batch size (100): ~7 batches
* - Memory savings: ~85% (700 records → 100 records in memory)
* - Verify: Check logs for "Generated and upserted GPU pricing records for Vultr"
*/
private *generateVultrGpuPricingBatches(
gpuInstanceTypeIds: number[],
regionIds: number[],
dbGpuInstanceMap: Map<number, { instance_id: string }>,
rawPlanMap: Map<string, { id: string; monthly_cost: number }>,
env?: Env
): Generator<GpuPricingInput[], void, void> {
const BATCH_SIZE = Math.min(
Math.max(parseInt(env?.SYNC_BATCH_SIZE || '500', 10) || 500, 1),
1000
);
let batch: GpuPricingInput[] = [];
for (const regionId of regionIds) {
for (const gpuInstanceId of gpuInstanceTypeIds) {
const dbInstance = dbGpuInstanceMap.get(gpuInstanceId);
if (!dbInstance) {
this.logger.warn('GPU instance type not found', { gpuInstanceId });
continue;
}
const rawPlan = rawPlanMap.get(dbInstance.instance_id);
if (!rawPlan) {
this.logger.warn('Raw GPU plan data not found', { instance_id: dbInstance.instance_id });
continue;
}
// Calculate hourly price: monthly_cost / 730 hours
const hourlyPrice = rawPlan.monthly_cost / 730;
batch.push({
gpu_instance_id: gpuInstanceId,
region_id: regionId,
hourly_price: hourlyPrice,
monthly_price: rawPlan.monthly_cost,
currency: 'USD',
available: 1,
});
if (batch.length >= BATCH_SIZE) {
yield batch;
batch = [];
}
}
}
// Yield remaining records
if (batch.length > 0) {
yield batch;
}
}
/**
* Generate G8 pricing records in batches for Linode
* Similar to GPU pricing generator but for G8 instances
*/
private *generateLinodeG8PricingBatches(
g8InstanceTypeIds: number[],
regionIds: number[],
dbG8InstanceMap: Map<number, { instance_id: string }>,
rawInstanceMap: Map<string, { id: string; price: { hourly: number; monthly: number } }>,
env?: Env
): Generator<G8PricingInput[], void, void> {
const BATCH_SIZE = Math.min(
Math.max(parseInt(env?.SYNC_BATCH_SIZE || '500', 10) || 500, 1),
1000
);
let batch: G8PricingInput[] = [];
for (const regionId of regionIds) {
for (const g8InstanceId of g8InstanceTypeIds) {
const dbInstance = dbG8InstanceMap.get(g8InstanceId);
if (!dbInstance) {
this.logger.warn('G8 instance type not found', { g8InstanceId });
continue;
}
const rawInstance = rawInstanceMap.get(dbInstance.instance_id);
if (!rawInstance) {
this.logger.warn('Raw G8 instance data not found', { instance_id: dbInstance.instance_id });
continue;
}
batch.push({
g8_instance_id: g8InstanceId,
region_id: regionId,
hourly_price: rawInstance.price.hourly,
monthly_price: rawInstance.price.monthly,
currency: 'USD',
available: 1,
});
if (batch.length >= BATCH_SIZE) {
yield batch;
batch = [];
}
}
}
// Yield remaining records
if (batch.length > 0) {
yield batch;
}
}
/**
* Generate VPU pricing records in batches for Linode
* Similar to GPU pricing generator but for VPU instances
*/
private *generateLinodeVpuPricingBatches(
vpuInstanceTypeIds: number[],
regionIds: number[],
dbVpuInstanceMap: Map<number, { instance_id: string }>,
rawInstanceMap: Map<string, { id: string; price: { hourly: number; monthly: number } }>,
env?: Env
): Generator<VpuPricingInput[], void, void> {
const BATCH_SIZE = Math.min(
Math.max(parseInt(env?.SYNC_BATCH_SIZE || '500', 10) || 500, 1),
1000
);
let batch: VpuPricingInput[] = [];
for (const regionId of regionIds) {
for (const vpuInstanceId of vpuInstanceTypeIds) {
const dbInstance = dbVpuInstanceMap.get(vpuInstanceId);
if (!dbInstance) {
this.logger.warn('VPU instance type not found', { vpuInstanceId });
continue;
}
const rawInstance = rawInstanceMap.get(dbInstance.instance_id);
if (!rawInstance) {
this.logger.warn('Raw VPU instance data not found', { instance_id: dbInstance.instance_id });
continue;
}
batch.push({
vpu_instance_id: vpuInstanceId,
region_id: regionId,
hourly_price: rawInstance.price.hourly,
monthly_price: rawInstance.price.monthly,
currency: 'USD',
available: 1,
});
if (batch.length >= BATCH_SIZE) {
yield batch;
batch = [];
}
}
}
// Yield remaining records
if (batch.length > 0) {
yield batch;
}
}
/**
* Synchronize Anvil pricing based on source provider pricing
*
* Updates anvil_pricing table with retail prices calculated from source pricing
* Formula: retail = cost × 1.21 (10% margin + 10% VAT)
*
* @param provider - Source provider name (linode, vultr, aws)
* @returns Number of anvil_pricing records updated
*/
private async syncAnvilPricing(provider: string): Promise<number> {
this.logger.info('Starting Anvil pricing sync', { provider });
try {
// Step 1: Find all anvil_regions sourced from this provider
const anvilRegionsResult = await this.repos.db
.prepare('SELECT id, source_region_id FROM anvil_regions WHERE source_provider = ?')
.bind(provider)
.all<{ id: number; source_region_id: number }>();
if (!anvilRegionsResult.success || anvilRegionsResult.results.length === 0) {
this.logger.info('No anvil_regions found for provider', { provider });
return 0;
}
const anvilRegions = anvilRegionsResult.results;
this.logger.info('Found anvil_regions', { provider, count: anvilRegions.length });
// Step 2: Find all anvil_pricing records with source_instance_id
const anvilPricingResult = await this.repos.db
.prepare(`
SELECT
ap.id,
ap.anvil_instance_id,
ap.anvil_region_id,
ap.source_instance_id,
ar.source_region_id
FROM anvil_pricing ap
JOIN anvil_regions ar ON ap.anvil_region_id = ar.id
WHERE ar.source_provider = ?
AND ap.source_instance_id IS NOT NULL
`)
.bind(provider)
.all<{
id: number;
anvil_instance_id: number;
anvil_region_id: number;
source_instance_id: number;
source_region_id: number;
}>();
if (!anvilPricingResult.success || anvilPricingResult.results.length === 0) {
this.logger.info('No anvil_pricing records found with source_instance_id', { provider });
return 0;
}
const anvilPricingRecords = anvilPricingResult.results;
this.logger.info('Found anvil_pricing records to update', {
provider,
count: anvilPricingRecords.length
});
// Step 4: Fetch source pricing data with paired conditions
// Batch queries to avoid SQLite limits (max 100 pairs per query)
const CHUNK_SIZE = 100;
const allSourcePricing: Array<{
instance_type_id: number;
region_id: number;
hourly_price: number;
monthly_price: number;
}> = [];
for (let i = 0; i < anvilPricingRecords.length; i += CHUNK_SIZE) {
const chunk = anvilPricingRecords.slice(i, i + CHUNK_SIZE);
if (chunk.length === 0) continue;
const conditions = chunk
.map(() => '(instance_type_id = ? AND region_id = ?)')
.join(' OR ');
const params = chunk.flatMap(r => [r.source_instance_id, r.source_region_id]);
const chunkResult = await this.repos.db
.prepare(`
SELECT
instance_type_id,
region_id,
hourly_price,
monthly_price
FROM pricing
WHERE ${conditions}
`)
.bind(...params)
.all<{
instance_type_id: number;
region_id: number;
hourly_price: number;
monthly_price: number;
}>();
if (chunkResult.success && chunkResult.results) {
allSourcePricing.push(...chunkResult.results);
}
}
if (allSourcePricing.length === 0) {
this.logger.warn('No source pricing data found', { provider });
return 0;
}
// Step 5: Build lookup map: `${instance_type_id}_${region_id}` → pricing
const sourcePricingMap = new Map<string, { hourly_price: number; monthly_price: number }>(
allSourcePricing.map(p => [
`${p.instance_type_id}_${p.region_id}`,
{ hourly_price: p.hourly_price, monthly_price: p.monthly_price }
])
);
// Step 6: Prepare update statements
const updateStatements: D1PreparedStatement[] = [];
for (const record of anvilPricingRecords) {
const lookupKey = `${record.source_instance_id}_${record.source_region_id}`;
const sourcePricing = sourcePricingMap.get(lookupKey);
if (!sourcePricing) {
this.logger.warn('Source pricing not found', {
anvil_pricing_id: record.id,
source_instance_id: record.source_instance_id,
source_region_id: record.source_region_id
});
continue;
}
// Calculate retail prices: cost × 1.21
const hourlyPrice = calculateRetailHourly(sourcePricing.hourly_price);
const monthlyPrice = calculateRetailMonthly(sourcePricing.monthly_price);
updateStatements.push(
this.repos.db.prepare(`
UPDATE anvil_pricing
SET
hourly_price = ?,
monthly_price = ?
WHERE id = ?
`).bind(
hourlyPrice,
monthlyPrice,
record.id
)
);
}
if (updateStatements.length === 0) {
this.logger.info('No anvil_pricing records to update', { provider });
return 0;
}
// Step 7: Execute batch update
const results = await this.repos.db.batch(updateStatements);
const successCount = results.reduce(
(sum, result) => sum + (result.meta?.changes ?? 0),
0
);
this.logger.info('Anvil pricing sync completed', {
provider,
updated: successCount,
total: updateStatements.length
});
return successCount;
} catch (error) {
this.logger.error('Anvil pricing sync failed', {
provider,
error: error instanceof Error ? error.message : String(error)
});
throw error;
}
}
/**
* Create connector for a specific provider
*
* @param provider - Provider name
* @param providerId - Database provider ID
* @returns Connector adapter instance for the provider
* @throws Error if provider is not supported
*/
private async createConnector(provider: string, providerId: number): Promise<SyncConnectorAdapter> {
switch (provider.toLowerCase()) {
case 'linode': {
const connector = new LinodeConnector(this.env);
// Cache instance types for pricing extraction
let cachedInstanceTypes: Awaited<ReturnType<typeof connector.fetchInstanceTypes>> | null = null;
return {
authenticate: () => connector.initialize(),
getRegions: async () => {
const regions = await connector.fetchRegions();
return regions.map(r => connector.normalizeRegion(r, providerId));
},
getInstanceTypes: async () => {
const instances = await connector.fetchInstanceTypes();
cachedInstanceTypes = instances; // Cache for pricing
// Classification priority:
// 1. GPU (gpus > 0) → handled in getGpuInstances
// 2. VPU (id contains 'netint' or 'accelerated') → handled in getVpuInstances
// 3. G8 (id starts with 'g8-') → handled in getG8Instances
// 4. Default → regular instance_types
const regularInstances = instances.filter(i => {
if (i.gpus > 0) return false;
if (i.id.includes('netint') || i.id.includes('accelerated')) return false;
if (i.id.startsWith('g8-')) return false;
return true;
});
return regularInstances.map(i => connector.normalizeInstance(i, providerId));
},
getGpuInstances: async (): Promise<GpuInstanceInput[]> => {
// Use cached instances if available to avoid redundant API calls
if (!cachedInstanceTypes) {
this.logger.info('Fetching instance types for GPU extraction');
cachedInstanceTypes = await connector.fetchInstanceTypes();
}
// Filter and normalize GPU instances
const gpuInstances = cachedInstanceTypes.filter(i => i.gpus > 0);
return gpuInstances.map(i => connector.normalizeGpuInstance(i, providerId));
},
getG8Instances: async (): Promise<G8InstanceInput[]> => {
// Use cached instances if available to avoid redundant API calls
if (!cachedInstanceTypes) {
this.logger.info('Fetching instance types for G8 extraction');
cachedInstanceTypes = await connector.fetchInstanceTypes();
}
// Filter and normalize G8 instances (g8- prefix)
const g8Instances = cachedInstanceTypes.filter(i =>
i.id.startsWith('g8-') && (!i.gpus || i.gpus === 0)
);
return g8Instances.map(i => connector.normalizeG8Instance(i, providerId));
},
getVpuInstances: async (): Promise<VpuInstanceInput[]> => {
// Use cached instances if available to avoid redundant API calls
if (!cachedInstanceTypes) {
this.logger.info('Fetching instance types for VPU extraction');
cachedInstanceTypes = await connector.fetchInstanceTypes();
}
// Filter and normalize VPU instances (netint or accelerated)
const vpuInstances = cachedInstanceTypes.filter(i =>
(i.id.includes('netint') || i.id.includes('accelerated')) && (!i.gpus || i.gpus === 0)
);
return vpuInstances.map(i => connector.normalizeVpuInstance(i, providerId));
},
getPricing: async (
_instanceTypeIds: number[],
regionIds: number[],
dbInstanceMap: Map<number, { instance_id: string }>,
dbGpuMap?: Map<number, { instance_id: string }>,
dbG8Map?: Map<number, { instance_id: string }>,
dbVpuMap?: Map<number, { instance_id: string }>
): Promise<number> => {
/**
* Linode Pricing Extraction Strategy (Generator Pattern):
*
* Linode pricing is embedded in instance type data (price.hourly, price.monthly).
* Generate all region × instance combinations using generator pattern.
* GPU instances are separated and stored in gpu_pricing table.
*
* Expected volume: ~190 regular + ~10 GPU instances × 20 regions = ~4,000 pricing records
* Generator pattern with 100 records/batch minimizes memory usage
* Each batch is immediately persisted to database to avoid memory buildup
*
* Memory savings: ~95% (4,000 records → 100 records in memory at a time)
*
* Manual Test:
* 1. Run sync: curl -X POST http://localhost:8787/api/sync/linode
* 2. Verify regular pricing: wrangler d1 execute cloud-instances-db --local --command "SELECT COUNT(*) FROM pricing WHERE instance_type_id IN (SELECT id FROM instance_types WHERE provider_id = (SELECT id FROM providers WHERE name = 'linode'))"
* 3. Verify GPU pricing: wrangler d1 execute cloud-instances-db --local --command "SELECT COUNT(*) FROM gpu_pricing WHERE gpu_instance_id IN (SELECT id FROM gpu_instances WHERE provider_id = (SELECT id FROM providers WHERE name = 'linode'))"
* 4. Sample pricing: wrangler d1 execute cloud-instances-db --local --command "SELECT p.*, i.instance_name, r.region_code FROM pricing p JOIN instance_types i ON p.instance_type_id = i.id JOIN regions r ON p.region_id = r.id WHERE i.provider_id = (SELECT id FROM providers WHERE name = 'linode') LIMIT 10"
*/
// Re-fetch instance types if not cached
if (!cachedInstanceTypes) {
this.logger.info('Fetching instance types for pricing extraction');
cachedInstanceTypes = await connector.fetchInstanceTypes();
}
// Create lookup map for raw instance data by instance_id (API ID)
const rawInstanceMap = new Map(
cachedInstanceTypes.map(i => [i.id, i])
);
// Use provided maps or create empty ones
const gpuMap = dbGpuMap || new Map();
const g8Map = dbG8Map || new Map();
const vpuMap = dbVpuMap || new Map();
// Separate instances by type: GPU, VPU, G8, and regular
const gpuInstanceTypeIds: number[] = [];
const g8InstanceTypeIds: number[] = [];
const vpuInstanceTypeIds: number[] = [];
const regularInstanceTypeIds: number[] = [];
// Extract GPU instance IDs from gpuMap
for (const dbId of gpuMap.keys()) {
gpuInstanceTypeIds.push(dbId);
}
// Extract G8 instance IDs from g8Map
for (const dbId of g8Map.keys()) {
g8InstanceTypeIds.push(dbId);
}
// Extract VPU instance IDs from vpuMap
for (const dbId of vpuMap.keys()) {
vpuInstanceTypeIds.push(dbId);
}
// Regular instances from dbInstanceMap
for (const dbId of dbInstanceMap.keys()) {
regularInstanceTypeIds.push(dbId);
}
// Process regular instance pricing
let regularPricingCount = 0;
if (regularInstanceTypeIds.length > 0) {
const regularGenerator = this.generateLinodePricingBatches(
regularInstanceTypeIds,
regionIds,
dbInstanceMap,
rawInstanceMap,
this.env
);
for (const batch of regularGenerator) {
const batchCount = await this.repos.pricing.upsertMany(batch);
regularPricingCount += batchCount;
}
}
// Process GPU instance pricing
let gpuPricingCount = 0;
if (gpuInstanceTypeIds.length > 0) {
const gpuGenerator = this.generateLinodeGpuPricingBatches(
gpuInstanceTypeIds,
regionIds,
gpuMap,
rawInstanceMap,
this.env
);
for (const batch of gpuGenerator) {
const batchCount = await this.repos.gpuPricing.upsertMany(batch);
gpuPricingCount += batchCount;
}
}
// Process G8 instance pricing
let g8PricingCount = 0;
if (g8InstanceTypeIds.length > 0) {
const g8Generator = this.generateLinodeG8PricingBatches(
g8InstanceTypeIds,
regionIds,
g8Map,
rawInstanceMap,
this.env
);
for (const batch of g8Generator) {
const batchCount = await this.repos.g8Pricing.upsertMany(batch);
g8PricingCount += batchCount;
}
}
// Process VPU instance pricing
let vpuPricingCount = 0;
if (vpuInstanceTypeIds.length > 0) {
const vpuGenerator = this.generateLinodeVpuPricingBatches(
vpuInstanceTypeIds,
regionIds,
vpuMap,
rawInstanceMap,
this.env
);
for (const batch of vpuGenerator) {
const batchCount = await this.repos.vpuPricing.upsertMany(batch);
vpuPricingCount += batchCount;
}
}
const totalCount = regularPricingCount + gpuPricingCount + g8PricingCount + vpuPricingCount;
this.logger.info('Generated and upserted pricing records for Linode', {
regular_pricing: regularPricingCount,
gpu_pricing: gpuPricingCount,
g8_pricing: g8PricingCount,
vpu_pricing: vpuPricingCount,
total: totalCount
});
// Return total count of processed records
return totalCount;
},
};
}
case 'vultr': {
const connector = new VultrConnector(this.env);
// Cache plans for pricing extraction
let cachedPlans: Awaited<ReturnType<typeof connector.fetchPlans>> | null = null;
return {
authenticate: () => connector.initialize(),
getRegions: async () => {
const regions = await connector.fetchRegions();
return regions.map(r => connector.normalizeRegion(r, providerId));
},
getInstanceTypes: async () => {
const plans = await connector.fetchPlans();
cachedPlans = plans; // Cache for pricing
// Filter out GPU instances (vcg type)
const regularPlans = plans.filter(p => !p.id.startsWith('vcg'));
return regularPlans.map(p => connector.normalizeInstance(p, providerId));
},
getGpuInstances: async (): Promise<GpuInstanceInput[]> => {
// Use cached plans if available to avoid redundant API calls
if (!cachedPlans) {
this.logger.info('Fetching plans for GPU extraction');
cachedPlans = await connector.fetchPlans();
}
// Filter and normalize GPU instances (vcg type)
const gpuPlans = cachedPlans.filter(p => p.id.startsWith('vcg'));
return gpuPlans.map(p => connector.normalizeGpuInstance(p, providerId));
},
getPricing: async (
instanceTypeIds: number[],
regionIds: number[],
dbInstanceMap: Map<number, { instance_id: string }>,
dbGpuMap?: Map<number, { instance_id: string }>
): Promise<number> => {
/**
* Vultr Pricing Extraction Strategy (Generator Pattern):
*
* Vultr pricing is embedded in plan data (monthly_cost).
* Generate all region × plan combinations using generator pattern.
*
* Expected volume: ~100 regular plans × 20 regions = ~2,000 pricing records
* ~35 GPU plans × 20 regions = ~700 GPU pricing records
* Generator pattern with 100 records/batch minimizes memory usage
* Each batch is immediately persisted to database to avoid memory buildup
*
* Memory savings: ~95% (2,700 records → 100 records in memory at a time)
*
* Manual Test:
* 1. Run sync: curl -X POST http://localhost:8787/api/sync/vultr
* 2. Verify pricing count: wrangler d1 execute cloud-instances-db --local --command "SELECT COUNT(*) FROM pricing WHERE instance_type_id IN (SELECT id FROM instance_types WHERE provider_id = (SELECT id FROM providers WHERE name = 'vultr'))"
* 3. Verify GPU pricing: wrangler d1 execute cloud-instances-db --local --command "SELECT COUNT(*) FROM gpu_pricing WHERE gpu_instance_id IN (SELECT id FROM gpu_instances WHERE provider_id = (SELECT id FROM providers WHERE name = 'vultr'))"
* 4. Sample pricing: wrangler d1 execute cloud-instances-db --local --command "SELECT p.*, i.instance_name, r.region_code FROM pricing p JOIN instance_types i ON p.instance_type_id = i.id JOIN regions r ON p.region_id = r.id WHERE i.provider_id = (SELECT id FROM providers WHERE name = 'vultr') LIMIT 10"
* 5. Verify data integrity: wrangler d1 execute cloud-instances-db --local --command "SELECT COUNT(*) FROM pricing WHERE hourly_price = 0 OR monthly_price = 0"
*/
// Re-fetch plans if not cached
if (!cachedPlans) {
this.logger.info('Fetching plans for pricing extraction');
cachedPlans = await connector.fetchPlans();
}
// Create lookup map for raw plan data by plan ID (API ID)
const rawPlanMap = new Map(
cachedPlans.map(p => [p.id, p])
);
// Process regular instance pricing
let regularPricingCount = 0;
if (instanceTypeIds.length > 0) {
const regularGenerator = this.generateVultrPricingBatches(
instanceTypeIds,
regionIds,
dbInstanceMap,
rawPlanMap,
this.env
);
for (const batch of regularGenerator) {
const batchCount = await this.repos.pricing.upsertMany(batch);
regularPricingCount += batchCount;
}
}
// Process GPU instance pricing
let gpuPricingCount = 0;
const gpuMap = dbGpuMap || new Map();
if (gpuMap.size > 0) {
const gpuInstanceTypeIds = Array.from(gpuMap.keys());
const gpuGenerator = this.generateVultrGpuPricingBatches(
gpuInstanceTypeIds,
regionIds,
gpuMap,
rawPlanMap,
this.env
);
for (const batch of gpuGenerator) {
const batchCount = await this.repos.gpuPricing.upsertMany(batch);
gpuPricingCount += batchCount;
}
}
const totalCount = regularPricingCount + gpuPricingCount;
this.logger.info('Generated and upserted pricing records for Vultr', {
regular_pricing: regularPricingCount,
gpu_pricing: gpuPricingCount,
total: totalCount
});
// Return total count of processed records
return totalCount;
},
};
}
case 'aws': {
const connector = new AWSConnector(this.env);
// Cache instance types for pricing extraction
let cachedInstanceTypes: Awaited<ReturnType<typeof connector.fetchInstanceTypes>> | null = null;
return {
authenticate: () => connector.initialize(),
getRegions: async () => {
const regions = await connector.fetchRegions();
return regions.map(r => connector.normalizeRegion(r, providerId));
},
getInstanceTypes: async () => {
const instances = await connector.fetchInstanceTypes();
cachedInstanceTypes = instances; // Cache for pricing
return instances.map(i => connector.normalizeInstance(i, providerId));
},
getPricing: async (
instanceTypeIds: number[],
regionIds: number[],
dbInstanceMap: Map<number, { instance_id: string }>
): Promise<number> => {
/**
* AWS Pricing Extraction Strategy (Generator Pattern):
*
* AWS pricing from ec2.shop is region-agnostic (same price globally).
* Generate all region × instance combinations using generator pattern.
*
* Expected volume: ~870 instances × 29 regions = ~25,230 pricing records
* Generator pattern with 100 records/batch minimizes memory usage
* Each batch is immediately persisted to database to avoid memory buildup
*
* Manual Test:
* 1. Run sync: curl -X POST http://localhost:8787/api/sync/aws
* 2. Verify pricing count: wrangler d1 execute cloud-instances-db --local --command "SELECT COUNT(*) FROM pricing WHERE instance_type_id IN (SELECT id FROM instance_types WHERE provider_id = (SELECT id FROM providers WHERE name = 'aws'))"
* 3. Sample pricing: wrangler d1 execute cloud-instances-db --local --command "SELECT p.*, i.instance_name, r.region_code FROM pricing p JOIN instance_types i ON p.instance_type_id = i.id JOIN regions r ON p.region_id = r.id WHERE i.provider_id = (SELECT id FROM providers WHERE name = 'aws') LIMIT 10"
* 4. Verify data integrity: wrangler d1 execute cloud-instances-db --local --command "SELECT COUNT(*) FROM pricing WHERE hourly_price = 0 OR monthly_price = 0"
*/
// Re-fetch instance types if not cached
if (!cachedInstanceTypes) {
this.logger.info('Fetching instance types for pricing extraction');
cachedInstanceTypes = await connector.fetchInstanceTypes();
}
// Create lookup map for raw instance data by instance_id (API ID)
const rawInstanceMap = new Map(
cachedInstanceTypes.map(i => [i.InstanceType, i])
);
// Use generator pattern for memory-efficient processing
const pricingGenerator = this.generateAWSPricingBatches(
instanceTypeIds,
regionIds,
dbInstanceMap,
rawInstanceMap
);
// Process batches incrementally
let totalCount = 0;
for (const batch of pricingGenerator) {
const batchCount = await this.repos.pricing.upsertMany(batch);
totalCount += batchCount;
}
this.logger.info('Generated and upserted pricing records for AWS', { count: totalCount });
// Return total count of processed records
return totalCount;
},
};
}
default:
throw new Error(`Unsupported provider: ${provider}`);
}
}
}