## Security Improvements - Fix timing attack in verifyApiKey with fixed 256-byte buffer - Fix sortOrder SQL injection with whitelist validation - Fix rate limiting bypass for non-Cloudflare traffic (fail-closed) - Remove stack trace exposure in error responses - Add request_id for audit trail (X-Request-ID header) - Sanitize origin header to prevent log injection - Add content-length validation for /sync endpoint (10KB limit) - Replace Math.random() with crypto.randomUUID() for sync IDs - Expand sensitive data masking patterns (8 → 18) ## Performance Improvements - Reduce rate limiter KV reads from 3 to 1 per request (66% reduction) - Increase sync batch size from 100 to 500 (80% fewer batches) - Fix health check N+1 query with efficient JOINs - Fix COUNT(*) Cartesian product with COUNT(DISTINCT) - Implement shared logger cache pattern across repositories - Add CacheService singleton pattern in recommend.ts - Add composite index for recommendation queries - Implement Anvil pricing query batching (100 per chunk) ## QA Improvements - Add BATCH_SIZE bounds validation (1-1000) - Add pagination bounds (page >= 1, MAX_OFFSET = 100000) - Add min/max range consistency validation - Add DB reference validation for singleton services - Add type guards for database result validation - Add timeout mechanism for external API calls (10-60s) - Use SUPPORTED_PROVIDERS constant instead of hardcoded list ## Removed - Remove Vault integration (using Wrangler secrets) - Remove 6-hour pricing cron (daily sync only) ## Configuration - Add idx_instance_types_specs_filter composite index - Add CORS Access-Control-Expose-Headers Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1545 lines
57 KiB
TypeScript
1545 lines
57 KiB
TypeScript
/**
|
||
* Sync Service - Orchestrates synchronization of cloud provider data
|
||
*
|
||
* Features:
|
||
* - Multi-provider synchronization (Linode, Vultr, AWS)
|
||
* - Stage-based sync process with error recovery
|
||
* - Provider status tracking and reporting
|
||
* - Batch operations for efficiency
|
||
*
|
||
* @example
|
||
* const orchestrator = new SyncOrchestrator(db, env);
|
||
* const report = await orchestrator.syncAll(['linode']);
|
||
*/
|
||
|
||
import { LinodeConnector } from '../connectors/linode';
|
||
import { VultrConnector } from '../connectors/vultr';
|
||
import { AWSConnector } from '../connectors/aws';
|
||
import { RepositoryFactory } from '../repositories';
|
||
import { createLogger } from '../utils/logger';
|
||
import { calculateRetailHourly, calculateRetailMonthly, SUPPORTED_PROVIDERS } from '../constants';
|
||
import type {
|
||
Env,
|
||
ProviderSyncResult,
|
||
SyncReport,
|
||
RegionInput,
|
||
InstanceTypeInput,
|
||
PricingInput,
|
||
GpuInstanceInput,
|
||
GpuPricingInput,
|
||
G8InstanceInput,
|
||
G8PricingInput,
|
||
VpuInstanceInput,
|
||
VpuPricingInput,
|
||
} from '../types';
|
||
import { SyncStage } from '../types';
|
||
|
||
/**
|
||
* Wraps a promise with a timeout
|
||
* @param promise - The promise to wrap
|
||
* @param ms - Timeout in milliseconds
|
||
* @param operation - Operation name for error message
|
||
* @returns Promise result if completed within timeout
|
||
* @throws Error if operation times out
|
||
*/
|
||
async function withTimeout<T>(promise: Promise<T>, ms: number, operation: string): Promise<T> {
|
||
let timeoutId: ReturnType<typeof setTimeout>;
|
||
const timeoutPromise = new Promise<never>((_, reject) => {
|
||
timeoutId = setTimeout(() => reject(new Error(`${operation} timed out after ${ms}ms`)), ms);
|
||
});
|
||
|
||
try {
|
||
return await Promise.race([promise, timeoutPromise]);
|
||
} finally {
|
||
clearTimeout(timeoutId!);
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Cloud provider connector interface for SyncOrchestrator
|
||
*
|
||
* This is an adapter interface used by SyncOrchestrator to abstract
|
||
* provider-specific implementations. Actual provider connectors (LinodeConnector,
|
||
* VultrConnector, etc.) extend CloudConnector from base.ts and are wrapped
|
||
* by this interface in createConnector().
|
||
*/
|
||
export interface SyncConnectorAdapter {
|
||
/** Authenticate and validate credentials */
|
||
authenticate(): Promise<void>;
|
||
|
||
/** Fetch all available regions (normalized) */
|
||
getRegions(): Promise<RegionInput[]>;
|
||
|
||
/** Fetch all instance types (normalized) */
|
||
getInstanceTypes(): Promise<InstanceTypeInput[]>;
|
||
|
||
/** Fetch GPU instances (optional, only for providers with GPU support) */
|
||
getGpuInstances?(): Promise<GpuInstanceInput[]>;
|
||
|
||
/** Fetch G8 instances (optional, only for Linode) */
|
||
getG8Instances?(): Promise<G8InstanceInput[]>;
|
||
|
||
/** Fetch VPU instances (optional, only for Linode) */
|
||
getVpuInstances?(): Promise<VpuInstanceInput[]>;
|
||
|
||
/**
|
||
* Fetch pricing data for instances and regions
|
||
* @param instanceTypeIds - Array of database instance type IDs
|
||
* @param regionIds - Array of database region IDs
|
||
* @param dbInstanceMap - Map of DB instance type ID to instance_id (API ID) for avoiding redundant queries
|
||
* @param dbGpuMap - Map of GPU instance IDs (optional)
|
||
* @param dbG8Map - Map of G8 instance IDs (optional)
|
||
* @param dbVpuMap - Map of VPU instance IDs (optional)
|
||
* @returns Array of pricing records OR number of records if batched internally
|
||
*/
|
||
getPricing(
|
||
instanceTypeIds: number[],
|
||
regionIds: number[],
|
||
dbInstanceMap: Map<number, { instance_id: string }>,
|
||
dbGpuMap?: Map<number, { instance_id: string }>,
|
||
dbG8Map?: Map<number, { instance_id: string }>,
|
||
dbVpuMap?: Map<number, { instance_id: string }>
|
||
): Promise<PricingInput[] | number>;
|
||
}
|
||
|
||
/**
|
||
* Sync orchestrator for managing provider synchronization
|
||
*/
|
||
export class SyncOrchestrator {
|
||
private repos: RepositoryFactory;
|
||
private logger: ReturnType<typeof createLogger>;
|
||
|
||
constructor(
|
||
db: D1Database,
|
||
private env: Env
|
||
) {
|
||
this.repos = new RepositoryFactory(db, env);
|
||
this.logger = createLogger('[SyncOrchestrator]', env);
|
||
this.logger.info('Initialized');
|
||
}
|
||
|
||
/**
|
||
* Synchronize a single provider
|
||
*
|
||
* @param provider - Provider name (linode, vultr, aws)
|
||
* @returns Sync result with statistics and error information
|
||
*/
|
||
async syncProvider(provider: string): Promise<ProviderSyncResult> {
|
||
const startTime = Date.now();
|
||
let stage = SyncStage.INIT;
|
||
|
||
this.logger.info('Starting sync for provider', { provider });
|
||
|
||
try {
|
||
// Stage 1: Initialize - Fetch provider record ONCE
|
||
stage = SyncStage.INIT;
|
||
const providerRecord = await this.repos.providers.findByName(provider);
|
||
if (!providerRecord) {
|
||
throw new Error(`Provider not found in database: ${provider}`);
|
||
}
|
||
|
||
// Update provider status to syncing
|
||
await this.repos.providers.updateSyncStatus(provider, 'syncing');
|
||
this.logger.info(`${provider} → ${stage}`);
|
||
|
||
// Stage 2: Initialize connector and authenticate
|
||
const connector = await this.createConnector(provider, providerRecord.id);
|
||
await withTimeout(connector.authenticate(), 10000, `${provider} authentication`);
|
||
this.logger.info(`${provider} → initialized`);
|
||
|
||
|
||
// Stage 3: Fetch regions from provider API
|
||
stage = SyncStage.FETCH_REGIONS;
|
||
const regions = await withTimeout(connector.getRegions(), 15000, `${provider} fetch regions`);
|
||
this.logger.info(`${provider} → ${stage}`, { regions: regions.length });
|
||
|
||
// Stage 4: Fetch instance types from provider API
|
||
stage = SyncStage.FETCH_INSTANCES;
|
||
const instances = await withTimeout(connector.getInstanceTypes(), 30000, `${provider} fetch instances`);
|
||
this.logger.info(`${provider} → ${stage}`, { instances: instances.length });
|
||
|
||
// Stage 5: Normalize data (add provider_id)
|
||
stage = SyncStage.NORMALIZE;
|
||
const normalizedRegions = regions.map(r => ({
|
||
...r,
|
||
provider_id: providerRecord.id,
|
||
}));
|
||
const normalizedInstances = instances.map(i => ({
|
||
...i,
|
||
provider_id: providerRecord.id,
|
||
}));
|
||
this.logger.info(`${provider} → ${stage}`);
|
||
|
||
// Stage 6: Persist to database
|
||
stage = SyncStage.PERSIST;
|
||
const regionsCount = await this.repos.regions.upsertMany(
|
||
providerRecord.id,
|
||
normalizedRegions
|
||
);
|
||
|
||
// Persist regular instances (already filtered in getInstanceTypes)
|
||
const regularInstancesCount = await this.repos.instances.upsertMany(
|
||
providerRecord.id,
|
||
normalizedInstances
|
||
);
|
||
|
||
// Handle specialized instances separately for Linode and Vultr
|
||
let gpuInstancesCount = 0;
|
||
let g8InstancesCount = 0;
|
||
let vpuInstancesCount = 0;
|
||
|
||
if (provider.toLowerCase() === 'linode') {
|
||
// GPU instances
|
||
if (connector.getGpuInstances) {
|
||
const gpuInstances = await withTimeout(connector.getGpuInstances(), 15000, `${provider} fetch GPU instances`);
|
||
if (gpuInstances && gpuInstances.length > 0) {
|
||
gpuInstancesCount = await this.repos.gpuInstances.upsertMany(
|
||
providerRecord.id,
|
||
gpuInstances
|
||
);
|
||
}
|
||
}
|
||
|
||
// G8 instances
|
||
if (connector.getG8Instances) {
|
||
const g8Instances = await withTimeout(connector.getG8Instances(), 15000, `${provider} fetch G8 instances`);
|
||
if (g8Instances && g8Instances.length > 0) {
|
||
g8InstancesCount = await this.repos.g8Instances.upsertMany(
|
||
providerRecord.id,
|
||
g8Instances
|
||
);
|
||
}
|
||
}
|
||
|
||
// VPU instances
|
||
if (connector.getVpuInstances) {
|
||
const vpuInstances = await withTimeout(connector.getVpuInstances(), 15000, `${provider} fetch VPU instances`);
|
||
if (vpuInstances && vpuInstances.length > 0) {
|
||
vpuInstancesCount = await this.repos.vpuInstances.upsertMany(
|
||
providerRecord.id,
|
||
vpuInstances
|
||
);
|
||
}
|
||
}
|
||
}
|
||
|
||
// Handle Vultr GPU instances
|
||
if (provider.toLowerCase() === 'vultr') {
|
||
if (connector.getGpuInstances) {
|
||
const gpuInstances = await withTimeout(connector.getGpuInstances(), 15000, `${provider} fetch GPU instances`);
|
||
if (gpuInstances && gpuInstances.length > 0) {
|
||
gpuInstancesCount = await this.repos.gpuInstances.upsertMany(
|
||
providerRecord.id,
|
||
gpuInstances
|
||
);
|
||
}
|
||
}
|
||
}
|
||
|
||
const instancesCount = regularInstancesCount + gpuInstancesCount + g8InstancesCount + vpuInstancesCount;
|
||
|
||
// Fetch pricing data - need instance and region IDs from DB
|
||
// Use D1 batch to reduce query count (fetch all instance types in one batch)
|
||
const batchQueries = [
|
||
this.repos.db.prepare('SELECT id, region_code FROM regions WHERE provider_id = ?').bind(providerRecord.id),
|
||
this.repos.db.prepare('SELECT id, instance_id FROM instance_types WHERE provider_id = ?').bind(providerRecord.id),
|
||
this.repos.db.prepare('SELECT id, instance_id FROM gpu_instances WHERE provider_id = ?').bind(providerRecord.id),
|
||
this.repos.db.prepare('SELECT id, instance_id FROM g8_instances WHERE provider_id = ?').bind(providerRecord.id),
|
||
this.repos.db.prepare('SELECT id, instance_id FROM vpu_instances WHERE provider_id = ?').bind(providerRecord.id)
|
||
];
|
||
|
||
const [dbRegionsResult, dbInstancesResult, dbGpuResult, dbG8Result, dbVpuResult] = await this.repos.db.batch(batchQueries);
|
||
|
||
if (!dbRegionsResult.success || !dbInstancesResult.success) {
|
||
throw new Error('Failed to fetch regions/instances for pricing');
|
||
}
|
||
|
||
// Validate and extract region IDs
|
||
if (!Array.isArray(dbRegionsResult.results)) {
|
||
throw new Error('Unexpected database result format for regions');
|
||
}
|
||
const regionIds = dbRegionsResult.results.map((r: any) => {
|
||
if (typeof r?.id !== 'number') {
|
||
throw new Error('Invalid region id in database result');
|
||
}
|
||
return r.id;
|
||
});
|
||
|
||
// Validate and extract instance type data
|
||
if (!Array.isArray(dbInstancesResult.results)) {
|
||
throw new Error('Unexpected database result format for instances');
|
||
}
|
||
const dbInstancesData = dbInstancesResult.results.map((i: any) => {
|
||
if (typeof i?.id !== 'number' || typeof i?.instance_id !== 'string') {
|
||
throw new Error('Invalid instance data in database result');
|
||
}
|
||
return { id: i.id, instance_id: i.instance_id };
|
||
});
|
||
const instanceTypeIds = dbInstancesData.map(i => i.id);
|
||
|
||
// Create instance mapping to avoid redundant queries in getPricing
|
||
const dbInstanceMap = new Map(
|
||
dbInstancesData.map(i => [i.id, { instance_id: i.instance_id }])
|
||
);
|
||
|
||
// Create specialized instance mappings with validation
|
||
if (!Array.isArray(dbGpuResult.results)) {
|
||
throw new Error('Unexpected database result format for GPU instances');
|
||
}
|
||
const dbGpuMap = new Map(
|
||
dbGpuResult.results.map((i: any) => {
|
||
if (typeof i?.id !== 'number' || typeof i?.instance_id !== 'string') {
|
||
throw new Error('Invalid GPU instance data in database result');
|
||
}
|
||
return [i.id, { instance_id: i.instance_id }];
|
||
})
|
||
);
|
||
|
||
if (!Array.isArray(dbG8Result.results)) {
|
||
throw new Error('Unexpected database result format for G8 instances');
|
||
}
|
||
const dbG8Map = new Map(
|
||
dbG8Result.results.map((i: any) => {
|
||
if (typeof i?.id !== 'number' || typeof i?.instance_id !== 'string') {
|
||
throw new Error('Invalid G8 instance data in database result');
|
||
}
|
||
return [i.id, { instance_id: i.instance_id }];
|
||
})
|
||
);
|
||
|
||
if (!Array.isArray(dbVpuResult.results)) {
|
||
throw new Error('Unexpected database result format for VPU instances');
|
||
}
|
||
const dbVpuMap = new Map(
|
||
dbVpuResult.results.map((i: any) => {
|
||
if (typeof i?.id !== 'number' || typeof i?.instance_id !== 'string') {
|
||
throw new Error('Invalid VPU instance data in database result');
|
||
}
|
||
return [i.id, { instance_id: i.instance_id }];
|
||
})
|
||
);
|
||
|
||
// Get pricing data - may return array or count depending on provider
|
||
// Pass all instance maps for specialized pricing
|
||
const pricingResult = await withTimeout(
|
||
connector.getPricing(
|
||
instanceTypeIds,
|
||
regionIds,
|
||
dbInstanceMap,
|
||
dbGpuMap,
|
||
dbG8Map,
|
||
dbVpuMap
|
||
),
|
||
60000,
|
||
`${provider} fetch pricing`
|
||
);
|
||
|
||
// Handle both return types: array (Linode, Vultr) or number (AWS with generator)
|
||
let pricingCount = 0;
|
||
if (typeof pricingResult === 'number') {
|
||
// Provider processed batches internally, returned count
|
||
pricingCount = pricingResult;
|
||
} else if (pricingResult.length > 0) {
|
||
// Provider returned pricing array, upsert it
|
||
pricingCount = await this.repos.pricing.upsertMany(pricingResult);
|
||
}
|
||
|
||
this.logger.info(`${provider} → ${stage}`, {
|
||
regions: regionsCount,
|
||
regular_instances: regularInstancesCount,
|
||
gpu_instances: gpuInstancesCount,
|
||
g8_instances: g8InstancesCount,
|
||
vpu_instances: vpuInstancesCount,
|
||
total_instances: instancesCount,
|
||
pricing: pricingCount
|
||
});
|
||
|
||
// Stage 7: Validate
|
||
stage = SyncStage.VALIDATE;
|
||
if (regionsCount === 0 || instancesCount === 0) {
|
||
throw new Error('No data was synced - possible API or parsing issue');
|
||
}
|
||
this.logger.info(`${provider} → ${stage}`);
|
||
|
||
// Stage 8: Sync Anvil Pricing (if applicable)
|
||
stage = SyncStage.SYNC_ANVIL_PRICING;
|
||
let anvilPricingCount = 0;
|
||
try {
|
||
anvilPricingCount = await this.syncAnvilPricing(provider);
|
||
if (anvilPricingCount > 0) {
|
||
this.logger.info(`${provider} → ${stage}`, { anvil_pricing: anvilPricingCount });
|
||
}
|
||
} catch (anvilError) {
|
||
// Log error but don't fail the entire sync
|
||
this.logger.error('Anvil pricing sync failed', {
|
||
provider,
|
||
error: anvilError instanceof Error ? anvilError.message : String(anvilError)
|
||
});
|
||
}
|
||
|
||
// Stage 9: Complete - Update provider status to success
|
||
stage = SyncStage.COMPLETE;
|
||
await this.repos.providers.updateSyncStatus(provider, 'success');
|
||
|
||
const duration = Date.now() - startTime;
|
||
this.logger.info(`${provider} → ${stage}`, { duration_ms: duration });
|
||
|
||
return {
|
||
provider,
|
||
success: true,
|
||
regions_synced: regionsCount,
|
||
instances_synced: instancesCount,
|
||
pricing_synced: pricingCount,
|
||
duration_ms: duration,
|
||
};
|
||
|
||
} catch (error) {
|
||
const duration = Date.now() - startTime;
|
||
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
|
||
|
||
this.logger.error(`${provider} failed at ${stage}`, { error: error instanceof Error ? error.message : String(error), stage });
|
||
|
||
// Update provider status to error
|
||
try {
|
||
await this.repos.providers.updateSyncStatus(provider, 'error', errorMessage);
|
||
} catch (statusError) {
|
||
this.logger.error('Failed to update provider status', { error: statusError instanceof Error ? statusError.message : String(statusError) });
|
||
}
|
||
|
||
return {
|
||
provider,
|
||
success: false,
|
||
regions_synced: 0,
|
||
instances_synced: 0,
|
||
pricing_synced: 0,
|
||
duration_ms: duration,
|
||
error: errorMessage,
|
||
error_details: {
|
||
stage,
|
||
message: errorMessage,
|
||
// Stack trace logged server-side only, not exposed to clients
|
||
},
|
||
};
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Synchronize all providers
|
||
*
|
||
* IMPORTANT: Providers are synced sequentially (not in parallel) to avoid
|
||
* exceeding Cloudflare Workers' 30-second CPU time limit. Each provider
|
||
* sync involves multiple API calls and database operations.
|
||
*
|
||
* For production deployments with large datasets, consider using
|
||
* Cloudflare Queues to process each provider as a separate job.
|
||
*
|
||
* @param providers - Array of provider names to sync (defaults to all supported providers)
|
||
* @returns Complete sync report with statistics
|
||
*/
|
||
async syncAll(providers: string[] = [...SUPPORTED_PROVIDERS]): Promise<SyncReport> {
|
||
const startedAt = new Date().toISOString();
|
||
const startTime = Date.now();
|
||
|
||
this.logger.info('Starting sequential sync for providers', { providers: providers.join(', ') });
|
||
|
||
// Run provider syncs sequentially to avoid CPU timeout
|
||
// Each provider sync is independent and can complete within time limits
|
||
const providerResults: ProviderSyncResult[] = [];
|
||
|
||
for (const provider of providers) {
|
||
try {
|
||
const result = await this.syncProvider(provider);
|
||
providerResults.push(result);
|
||
|
||
// Log progress after each provider
|
||
this.logger.info('Provider sync completed', {
|
||
provider,
|
||
success: result.success,
|
||
elapsed_ms: Date.now() - startTime
|
||
});
|
||
} catch (error) {
|
||
// Handle unexpected errors
|
||
providerResults.push({
|
||
provider,
|
||
success: false,
|
||
regions_synced: 0,
|
||
instances_synced: 0,
|
||
pricing_synced: 0,
|
||
duration_ms: 0,
|
||
error: error instanceof Error ? error.message : 'Unknown error',
|
||
});
|
||
}
|
||
}
|
||
|
||
const completedAt = new Date().toISOString();
|
||
const totalDuration = Date.now() - startTime;
|
||
|
||
// Calculate summary
|
||
const successful = providerResults.filter(r => r.success);
|
||
const failed = providerResults.filter(r => !r.success);
|
||
|
||
const summary = {
|
||
total_providers: providers.length,
|
||
successful_providers: successful.length,
|
||
failed_providers: failed.length,
|
||
total_regions: providerResults.reduce((sum, r) => sum + r.regions_synced, 0),
|
||
total_instances: providerResults.reduce((sum, r) => sum + r.instances_synced, 0),
|
||
total_pricing: providerResults.reduce((sum, r) => sum + r.pricing_synced, 0),
|
||
};
|
||
|
||
const report: SyncReport = {
|
||
success: failed.length === 0,
|
||
started_at: startedAt,
|
||
completed_at: completedAt,
|
||
total_duration_ms: totalDuration,
|
||
providers: providerResults,
|
||
summary,
|
||
};
|
||
|
||
this.logger.info('Sync complete', {
|
||
total: summary.total_providers,
|
||
success: summary.successful_providers,
|
||
failed: summary.failed_providers,
|
||
duration_ms: totalDuration,
|
||
});
|
||
|
||
return report;
|
||
}
|
||
|
||
/**
|
||
* Generate AWS pricing records in batches using Generator pattern
|
||
* Minimizes memory usage by yielding batches of 100 records at a time
|
||
*
|
||
* @param instanceTypeIds - Array of database instance type IDs
|
||
* @param regionIds - Array of database region IDs
|
||
* @param dbInstanceMap - Map of instance type ID to DB instance data
|
||
* @param rawInstanceMap - Map of instance_id (API ID) to raw AWS data
|
||
* @yields Batches of PricingInput records (100 per batch)
|
||
*
|
||
* Manual Test:
|
||
* Generator yields ~252 batches for ~25,230 total records (870 instances × 29 regions)
|
||
*/
|
||
private *generateAWSPricingBatches(
|
||
instanceTypeIds: number[],
|
||
regionIds: number[],
|
||
dbInstanceMap: Map<number, { instance_id: string }>,
|
||
rawInstanceMap: Map<string, { Cost: number; MonthlyPrice: number }>
|
||
): Generator<PricingInput[], void, void> {
|
||
const BATCH_SIZE = 500;
|
||
let batch: PricingInput[] = [];
|
||
|
||
for (const regionId of regionIds) {
|
||
for (const instanceTypeId of instanceTypeIds) {
|
||
const dbInstance = dbInstanceMap.get(instanceTypeId);
|
||
if (!dbInstance) {
|
||
this.logger.warn('Instance type not found', { instanceTypeId });
|
||
continue;
|
||
}
|
||
|
||
const rawInstance = rawInstanceMap.get(dbInstance.instance_id);
|
||
if (!rawInstance) {
|
||
this.logger.warn('Raw instance data not found', { instance_id: dbInstance.instance_id });
|
||
continue;
|
||
}
|
||
|
||
batch.push({
|
||
instance_type_id: instanceTypeId,
|
||
region_id: regionId,
|
||
hourly_price: rawInstance.Cost,
|
||
monthly_price: rawInstance.MonthlyPrice,
|
||
currency: 'USD',
|
||
available: 1,
|
||
});
|
||
|
||
if (batch.length >= BATCH_SIZE) {
|
||
yield batch;
|
||
batch = [];
|
||
}
|
||
}
|
||
}
|
||
|
||
// Yield remaining records
|
||
if (batch.length > 0) {
|
||
yield batch;
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Generate Linode pricing records in batches using Generator pattern
|
||
* Minimizes memory usage by yielding batches at a time (default: 100)
|
||
*
|
||
* @param instanceTypeIds - Array of database instance type IDs
|
||
* @param regionIds - Array of database region IDs
|
||
* @param dbInstanceMap - Map of instance type ID to DB instance data
|
||
* @param rawInstanceMap - Map of instance_id (API ID) to raw Linode data
|
||
* @param env - Environment configuration for SYNC_BATCH_SIZE
|
||
* @yields Batches of PricingInput records (configurable batch size)
|
||
*
|
||
* Manual Test:
|
||
* For typical Linode deployment (~200 instance types × 20 regions = 4,000 records):
|
||
* - Default batch size (100): ~40 batches
|
||
* - Memory savings: ~95% (4,000 records → 100 records in memory)
|
||
* - Verify: Check logs for "Generated and upserted pricing records for Linode"
|
||
*/
|
||
private *generateLinodePricingBatches(
|
||
instanceTypeIds: number[],
|
||
regionIds: number[],
|
||
dbInstanceMap: Map<number, { instance_id: string }>,
|
||
rawInstanceMap: Map<string, { id: string; price: { hourly: number; monthly: number } }>,
|
||
env?: Env
|
||
): Generator<PricingInput[], void, void> {
|
||
const BATCH_SIZE = Math.min(
|
||
Math.max(parseInt(env?.SYNC_BATCH_SIZE || '500', 10) || 500, 1),
|
||
1000
|
||
);
|
||
let batch: PricingInput[] = [];
|
||
|
||
for (const regionId of regionIds) {
|
||
for (const instanceTypeId of instanceTypeIds) {
|
||
const dbInstance = dbInstanceMap.get(instanceTypeId);
|
||
if (!dbInstance) {
|
||
this.logger.warn('Instance type not found', { instanceTypeId });
|
||
continue;
|
||
}
|
||
|
||
const rawInstance = rawInstanceMap.get(dbInstance.instance_id);
|
||
if (!rawInstance) {
|
||
this.logger.warn('Raw instance data not found', { instance_id: dbInstance.instance_id });
|
||
continue;
|
||
}
|
||
|
||
batch.push({
|
||
instance_type_id: instanceTypeId,
|
||
region_id: regionId,
|
||
hourly_price: rawInstance.price.hourly,
|
||
monthly_price: rawInstance.price.monthly,
|
||
currency: 'USD',
|
||
available: 1,
|
||
});
|
||
|
||
if (batch.length >= BATCH_SIZE) {
|
||
yield batch;
|
||
batch = [];
|
||
}
|
||
}
|
||
}
|
||
|
||
// Yield remaining records
|
||
if (batch.length > 0) {
|
||
yield batch;
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Generate Vultr pricing records in batches using Generator pattern
|
||
* Minimizes memory usage by yielding batches at a time (default: 100)
|
||
*
|
||
* @param instanceTypeIds - Array of database instance type IDs
|
||
* @param regionIds - Array of database region IDs
|
||
* @param dbInstanceMap - Map of instance type ID to DB instance data
|
||
* @param rawPlanMap - Map of plan_id (API ID) to raw Vultr plan data
|
||
* @param env - Environment configuration for SYNC_BATCH_SIZE
|
||
* @yields Batches of PricingInput records (configurable batch size)
|
||
*
|
||
* Manual Test:
|
||
* For typical Vultr deployment (~100 plans × 20 regions = 2,000 records):
|
||
* - Default batch size (100): ~20 batches
|
||
* - Memory savings: ~95% (2,000 records → 100 records in memory)
|
||
* - Verify: Check logs for "Generated and upserted pricing records for Vultr"
|
||
*/
|
||
private *generateVultrPricingBatches(
|
||
instanceTypeIds: number[],
|
||
regionIds: number[],
|
||
dbInstanceMap: Map<number, { instance_id: string }>,
|
||
rawPlanMap: Map<string, { id: string; monthly_cost: number }>,
|
||
env?: Env
|
||
): Generator<PricingInput[], void, void> {
|
||
const BATCH_SIZE = Math.min(
|
||
Math.max(parseInt(env?.SYNC_BATCH_SIZE || '500', 10) || 500, 1),
|
||
1000
|
||
);
|
||
let batch: PricingInput[] = [];
|
||
|
||
for (const regionId of regionIds) {
|
||
for (const instanceTypeId of instanceTypeIds) {
|
||
const dbInstance = dbInstanceMap.get(instanceTypeId);
|
||
if (!dbInstance) {
|
||
this.logger.warn('Instance type not found', { instanceTypeId });
|
||
continue;
|
||
}
|
||
|
||
const rawPlan = rawPlanMap.get(dbInstance.instance_id);
|
||
if (!rawPlan) {
|
||
this.logger.warn('Raw plan data not found', { instance_id: dbInstance.instance_id });
|
||
continue;
|
||
}
|
||
|
||
// Calculate hourly price: monthly_cost / 730 hours
|
||
const hourlyPrice = rawPlan.monthly_cost / 730;
|
||
|
||
batch.push({
|
||
instance_type_id: instanceTypeId,
|
||
region_id: regionId,
|
||
hourly_price: hourlyPrice,
|
||
monthly_price: rawPlan.monthly_cost,
|
||
currency: 'USD',
|
||
available: 1,
|
||
});
|
||
|
||
if (batch.length >= BATCH_SIZE) {
|
||
yield batch;
|
||
batch = [];
|
||
}
|
||
}
|
||
}
|
||
|
||
// Yield remaining records
|
||
if (batch.length > 0) {
|
||
yield batch;
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Generate Linode GPU pricing records in batches using Generator pattern
|
||
* Minimizes memory usage by yielding batches at a time (default: 100)
|
||
*
|
||
* @param gpuInstanceTypeIds - Array of database GPU instance type IDs
|
||
* @param regionIds - Array of database region IDs
|
||
* @param dbGpuInstanceMap - Map of GPU instance type ID to DB instance data
|
||
* @param rawInstanceMap - Map of instance_id (API ID) to raw Linode data
|
||
* @param env - Environment configuration for SYNC_BATCH_SIZE
|
||
* @yields Batches of GpuPricingInput records (configurable batch size)
|
||
*
|
||
* Manual Test:
|
||
* For typical Linode GPU instances (~10 GPU types × 20 regions = 200 records):
|
||
* - Default batch size (100): ~2 batches
|
||
* - Memory savings: ~50% (200 records → 100 records in memory)
|
||
* - Verify: Check logs for "Generated and upserted GPU pricing records for Linode"
|
||
*/
|
||
private *generateLinodeGpuPricingBatches(
|
||
gpuInstanceTypeIds: number[],
|
||
regionIds: number[],
|
||
dbGpuInstanceMap: Map<number, { instance_id: string }>,
|
||
rawInstanceMap: Map<string, { id: string; price: { hourly: number; monthly: number } }>,
|
||
env?: Env
|
||
): Generator<GpuPricingInput[], void, void> {
|
||
const BATCH_SIZE = Math.min(
|
||
Math.max(parseInt(env?.SYNC_BATCH_SIZE || '500', 10) || 500, 1),
|
||
1000
|
||
);
|
||
let batch: GpuPricingInput[] = [];
|
||
|
||
for (const regionId of regionIds) {
|
||
for (const gpuInstanceId of gpuInstanceTypeIds) {
|
||
const dbInstance = dbGpuInstanceMap.get(gpuInstanceId);
|
||
if (!dbInstance) {
|
||
this.logger.warn('GPU instance type not found', { gpuInstanceId });
|
||
continue;
|
||
}
|
||
|
||
const rawInstance = rawInstanceMap.get(dbInstance.instance_id);
|
||
if (!rawInstance) {
|
||
this.logger.warn('Raw GPU instance data not found', { instance_id: dbInstance.instance_id });
|
||
continue;
|
||
}
|
||
|
||
batch.push({
|
||
gpu_instance_id: gpuInstanceId,
|
||
region_id: regionId,
|
||
hourly_price: rawInstance.price.hourly,
|
||
monthly_price: rawInstance.price.monthly,
|
||
currency: 'USD',
|
||
available: 1,
|
||
});
|
||
|
||
if (batch.length >= BATCH_SIZE) {
|
||
yield batch;
|
||
batch = [];
|
||
}
|
||
}
|
||
}
|
||
|
||
// Yield remaining records
|
||
if (batch.length > 0) {
|
||
yield batch;
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Generate Vultr GPU pricing records in batches using Generator pattern
|
||
* Minimizes memory usage by yielding batches at a time (default: 100)
|
||
*
|
||
* @param gpuInstanceTypeIds - Array of database GPU instance type IDs
|
||
* @param regionIds - Array of database region IDs
|
||
* @param dbGpuInstanceMap - Map of GPU instance type ID to DB instance data
|
||
* @param rawPlanMap - Map of plan_id (API ID) to raw Vultr plan data
|
||
* @param env - Environment configuration for SYNC_BATCH_SIZE
|
||
* @yields Batches of GpuPricingInput records (configurable batch size)
|
||
*
|
||
* Manual Test:
|
||
* For typical Vultr GPU instances (~35 vcg types × 20 regions = 700 records):
|
||
* - Default batch size (100): ~7 batches
|
||
* - Memory savings: ~85% (700 records → 100 records in memory)
|
||
* - Verify: Check logs for "Generated and upserted GPU pricing records for Vultr"
|
||
*/
|
||
private *generateVultrGpuPricingBatches(
|
||
gpuInstanceTypeIds: number[],
|
||
regionIds: number[],
|
||
dbGpuInstanceMap: Map<number, { instance_id: string }>,
|
||
rawPlanMap: Map<string, { id: string; monthly_cost: number }>,
|
||
env?: Env
|
||
): Generator<GpuPricingInput[], void, void> {
|
||
const BATCH_SIZE = Math.min(
|
||
Math.max(parseInt(env?.SYNC_BATCH_SIZE || '500', 10) || 500, 1),
|
||
1000
|
||
);
|
||
let batch: GpuPricingInput[] = [];
|
||
|
||
for (const regionId of regionIds) {
|
||
for (const gpuInstanceId of gpuInstanceTypeIds) {
|
||
const dbInstance = dbGpuInstanceMap.get(gpuInstanceId);
|
||
if (!dbInstance) {
|
||
this.logger.warn('GPU instance type not found', { gpuInstanceId });
|
||
continue;
|
||
}
|
||
|
||
const rawPlan = rawPlanMap.get(dbInstance.instance_id);
|
||
if (!rawPlan) {
|
||
this.logger.warn('Raw GPU plan data not found', { instance_id: dbInstance.instance_id });
|
||
continue;
|
||
}
|
||
|
||
// Calculate hourly price: monthly_cost / 730 hours
|
||
const hourlyPrice = rawPlan.monthly_cost / 730;
|
||
|
||
batch.push({
|
||
gpu_instance_id: gpuInstanceId,
|
||
region_id: regionId,
|
||
hourly_price: hourlyPrice,
|
||
monthly_price: rawPlan.monthly_cost,
|
||
currency: 'USD',
|
||
available: 1,
|
||
});
|
||
|
||
if (batch.length >= BATCH_SIZE) {
|
||
yield batch;
|
||
batch = [];
|
||
}
|
||
}
|
||
}
|
||
|
||
// Yield remaining records
|
||
if (batch.length > 0) {
|
||
yield batch;
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Generate G8 pricing records in batches for Linode
|
||
* Similar to GPU pricing generator but for G8 instances
|
||
*/
|
||
private *generateLinodeG8PricingBatches(
|
||
g8InstanceTypeIds: number[],
|
||
regionIds: number[],
|
||
dbG8InstanceMap: Map<number, { instance_id: string }>,
|
||
rawInstanceMap: Map<string, { id: string; price: { hourly: number; monthly: number } }>,
|
||
env?: Env
|
||
): Generator<G8PricingInput[], void, void> {
|
||
const BATCH_SIZE = Math.min(
|
||
Math.max(parseInt(env?.SYNC_BATCH_SIZE || '500', 10) || 500, 1),
|
||
1000
|
||
);
|
||
let batch: G8PricingInput[] = [];
|
||
|
||
for (const regionId of regionIds) {
|
||
for (const g8InstanceId of g8InstanceTypeIds) {
|
||
const dbInstance = dbG8InstanceMap.get(g8InstanceId);
|
||
if (!dbInstance) {
|
||
this.logger.warn('G8 instance type not found', { g8InstanceId });
|
||
continue;
|
||
}
|
||
|
||
const rawInstance = rawInstanceMap.get(dbInstance.instance_id);
|
||
if (!rawInstance) {
|
||
this.logger.warn('Raw G8 instance data not found', { instance_id: dbInstance.instance_id });
|
||
continue;
|
||
}
|
||
|
||
batch.push({
|
||
g8_instance_id: g8InstanceId,
|
||
region_id: regionId,
|
||
hourly_price: rawInstance.price.hourly,
|
||
monthly_price: rawInstance.price.monthly,
|
||
currency: 'USD',
|
||
available: 1,
|
||
});
|
||
|
||
if (batch.length >= BATCH_SIZE) {
|
||
yield batch;
|
||
batch = [];
|
||
}
|
||
}
|
||
}
|
||
|
||
// Yield remaining records
|
||
if (batch.length > 0) {
|
||
yield batch;
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Generate VPU pricing records in batches for Linode
|
||
* Similar to GPU pricing generator but for VPU instances
|
||
*/
|
||
private *generateLinodeVpuPricingBatches(
|
||
vpuInstanceTypeIds: number[],
|
||
regionIds: number[],
|
||
dbVpuInstanceMap: Map<number, { instance_id: string }>,
|
||
rawInstanceMap: Map<string, { id: string; price: { hourly: number; monthly: number } }>,
|
||
env?: Env
|
||
): Generator<VpuPricingInput[], void, void> {
|
||
const BATCH_SIZE = Math.min(
|
||
Math.max(parseInt(env?.SYNC_BATCH_SIZE || '500', 10) || 500, 1),
|
||
1000
|
||
);
|
||
let batch: VpuPricingInput[] = [];
|
||
|
||
for (const regionId of regionIds) {
|
||
for (const vpuInstanceId of vpuInstanceTypeIds) {
|
||
const dbInstance = dbVpuInstanceMap.get(vpuInstanceId);
|
||
if (!dbInstance) {
|
||
this.logger.warn('VPU instance type not found', { vpuInstanceId });
|
||
continue;
|
||
}
|
||
|
||
const rawInstance = rawInstanceMap.get(dbInstance.instance_id);
|
||
if (!rawInstance) {
|
||
this.logger.warn('Raw VPU instance data not found', { instance_id: dbInstance.instance_id });
|
||
continue;
|
||
}
|
||
|
||
batch.push({
|
||
vpu_instance_id: vpuInstanceId,
|
||
region_id: regionId,
|
||
hourly_price: rawInstance.price.hourly,
|
||
monthly_price: rawInstance.price.monthly,
|
||
currency: 'USD',
|
||
available: 1,
|
||
});
|
||
|
||
if (batch.length >= BATCH_SIZE) {
|
||
yield batch;
|
||
batch = [];
|
||
}
|
||
}
|
||
}
|
||
|
||
// Yield remaining records
|
||
if (batch.length > 0) {
|
||
yield batch;
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Synchronize Anvil pricing based on source provider pricing
|
||
*
|
||
* Updates anvil_pricing table with retail prices calculated from source pricing
|
||
* Formula: retail = cost × 1.21 (10% margin + 10% VAT)
|
||
*
|
||
* @param provider - Source provider name (linode, vultr, aws)
|
||
* @returns Number of anvil_pricing records updated
|
||
*/
|
||
private async syncAnvilPricing(provider: string): Promise<number> {
|
||
this.logger.info('Starting Anvil pricing sync', { provider });
|
||
|
||
try {
|
||
// Step 1: Find all anvil_regions sourced from this provider
|
||
const anvilRegionsResult = await this.repos.db
|
||
.prepare('SELECT id, source_region_id FROM anvil_regions WHERE source_provider = ?')
|
||
.bind(provider)
|
||
.all<{ id: number; source_region_id: number }>();
|
||
|
||
if (!anvilRegionsResult.success || anvilRegionsResult.results.length === 0) {
|
||
this.logger.info('No anvil_regions found for provider', { provider });
|
||
return 0;
|
||
}
|
||
|
||
const anvilRegions = anvilRegionsResult.results;
|
||
this.logger.info('Found anvil_regions', { provider, count: anvilRegions.length });
|
||
|
||
// Step 2: Find all anvil_pricing records with source_instance_id
|
||
const anvilPricingResult = await this.repos.db
|
||
.prepare(`
|
||
SELECT
|
||
ap.id,
|
||
ap.anvil_instance_id,
|
||
ap.anvil_region_id,
|
||
ap.source_instance_id,
|
||
ar.source_region_id
|
||
FROM anvil_pricing ap
|
||
JOIN anvil_regions ar ON ap.anvil_region_id = ar.id
|
||
WHERE ar.source_provider = ?
|
||
AND ap.source_instance_id IS NOT NULL
|
||
`)
|
||
.bind(provider)
|
||
.all<{
|
||
id: number;
|
||
anvil_instance_id: number;
|
||
anvil_region_id: number;
|
||
source_instance_id: number;
|
||
source_region_id: number;
|
||
}>();
|
||
|
||
if (!anvilPricingResult.success || anvilPricingResult.results.length === 0) {
|
||
this.logger.info('No anvil_pricing records found with source_instance_id', { provider });
|
||
return 0;
|
||
}
|
||
|
||
const anvilPricingRecords = anvilPricingResult.results;
|
||
this.logger.info('Found anvil_pricing records to update', {
|
||
provider,
|
||
count: anvilPricingRecords.length
|
||
});
|
||
|
||
// Step 4: Fetch source pricing data with paired conditions
|
||
// Batch queries to avoid SQLite limits (max 100 pairs per query)
|
||
const CHUNK_SIZE = 100;
|
||
const allSourcePricing: Array<{
|
||
instance_type_id: number;
|
||
region_id: number;
|
||
hourly_price: number;
|
||
monthly_price: number;
|
||
}> = [];
|
||
|
||
for (let i = 0; i < anvilPricingRecords.length; i += CHUNK_SIZE) {
|
||
const chunk = anvilPricingRecords.slice(i, i + CHUNK_SIZE);
|
||
if (chunk.length === 0) continue;
|
||
|
||
const conditions = chunk
|
||
.map(() => '(instance_type_id = ? AND region_id = ?)')
|
||
.join(' OR ');
|
||
const params = chunk.flatMap(r => [r.source_instance_id, r.source_region_id]);
|
||
|
||
const chunkResult = await this.repos.db
|
||
.prepare(`
|
||
SELECT
|
||
instance_type_id,
|
||
region_id,
|
||
hourly_price,
|
||
monthly_price
|
||
FROM pricing
|
||
WHERE ${conditions}
|
||
`)
|
||
.bind(...params)
|
||
.all<{
|
||
instance_type_id: number;
|
||
region_id: number;
|
||
hourly_price: number;
|
||
monthly_price: number;
|
||
}>();
|
||
|
||
if (chunkResult.success && chunkResult.results) {
|
||
allSourcePricing.push(...chunkResult.results);
|
||
}
|
||
}
|
||
|
||
if (allSourcePricing.length === 0) {
|
||
this.logger.warn('No source pricing data found', { provider });
|
||
return 0;
|
||
}
|
||
|
||
// Step 5: Build lookup map: `${instance_type_id}_${region_id}` → pricing
|
||
const sourcePricingMap = new Map<string, { hourly_price: number; monthly_price: number }>(
|
||
allSourcePricing.map(p => [
|
||
`${p.instance_type_id}_${p.region_id}`,
|
||
{ hourly_price: p.hourly_price, monthly_price: p.monthly_price }
|
||
])
|
||
);
|
||
|
||
// Step 6: Prepare update statements
|
||
const updateStatements: D1PreparedStatement[] = [];
|
||
|
||
for (const record of anvilPricingRecords) {
|
||
const lookupKey = `${record.source_instance_id}_${record.source_region_id}`;
|
||
const sourcePricing = sourcePricingMap.get(lookupKey);
|
||
|
||
if (!sourcePricing) {
|
||
this.logger.warn('Source pricing not found', {
|
||
anvil_pricing_id: record.id,
|
||
source_instance_id: record.source_instance_id,
|
||
source_region_id: record.source_region_id
|
||
});
|
||
continue;
|
||
}
|
||
|
||
// Calculate retail prices: cost × 1.21
|
||
const hourlyPrice = calculateRetailHourly(sourcePricing.hourly_price);
|
||
const monthlyPrice = calculateRetailMonthly(sourcePricing.monthly_price);
|
||
|
||
updateStatements.push(
|
||
this.repos.db.prepare(`
|
||
UPDATE anvil_pricing
|
||
SET
|
||
hourly_price = ?,
|
||
monthly_price = ?
|
||
WHERE id = ?
|
||
`).bind(
|
||
hourlyPrice,
|
||
monthlyPrice,
|
||
record.id
|
||
)
|
||
);
|
||
}
|
||
|
||
if (updateStatements.length === 0) {
|
||
this.logger.info('No anvil_pricing records to update', { provider });
|
||
return 0;
|
||
}
|
||
|
||
// Step 7: Execute batch update
|
||
const results = await this.repos.db.batch(updateStatements);
|
||
const successCount = results.reduce(
|
||
(sum, result) => sum + (result.meta?.changes ?? 0),
|
||
0
|
||
);
|
||
|
||
this.logger.info('Anvil pricing sync completed', {
|
||
provider,
|
||
updated: successCount,
|
||
total: updateStatements.length
|
||
});
|
||
|
||
return successCount;
|
||
|
||
} catch (error) {
|
||
this.logger.error('Anvil pricing sync failed', {
|
||
provider,
|
||
error: error instanceof Error ? error.message : String(error)
|
||
});
|
||
throw error;
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Create connector for a specific provider
|
||
*
|
||
* @param provider - Provider name
|
||
* @param providerId - Database provider ID
|
||
* @returns Connector adapter instance for the provider
|
||
* @throws Error if provider is not supported
|
||
*/
|
||
private async createConnector(provider: string, providerId: number): Promise<SyncConnectorAdapter> {
|
||
switch (provider.toLowerCase()) {
|
||
case 'linode': {
|
||
const connector = new LinodeConnector(this.env);
|
||
// Cache instance types for pricing extraction
|
||
let cachedInstanceTypes: Awaited<ReturnType<typeof connector.fetchInstanceTypes>> | null = null;
|
||
|
||
return {
|
||
authenticate: () => connector.initialize(),
|
||
getRegions: async () => {
|
||
const regions = await connector.fetchRegions();
|
||
return regions.map(r => connector.normalizeRegion(r, providerId));
|
||
},
|
||
getInstanceTypes: async () => {
|
||
const instances = await connector.fetchInstanceTypes();
|
||
cachedInstanceTypes = instances; // Cache for pricing
|
||
|
||
// Classification priority:
|
||
// 1. GPU (gpus > 0) → handled in getGpuInstances
|
||
// 2. VPU (id contains 'netint' or 'accelerated') → handled in getVpuInstances
|
||
// 3. G8 (id starts with 'g8-') → handled in getG8Instances
|
||
// 4. Default → regular instance_types
|
||
const regularInstances = instances.filter(i => {
|
||
if (i.gpus > 0) return false;
|
||
if (i.id.includes('netint') || i.id.includes('accelerated')) return false;
|
||
if (i.id.startsWith('g8-')) return false;
|
||
return true;
|
||
});
|
||
return regularInstances.map(i => connector.normalizeInstance(i, providerId));
|
||
},
|
||
getGpuInstances: async (): Promise<GpuInstanceInput[]> => {
|
||
// Use cached instances if available to avoid redundant API calls
|
||
if (!cachedInstanceTypes) {
|
||
this.logger.info('Fetching instance types for GPU extraction');
|
||
cachedInstanceTypes = await connector.fetchInstanceTypes();
|
||
}
|
||
|
||
// Filter and normalize GPU instances
|
||
const gpuInstances = cachedInstanceTypes.filter(i => i.gpus > 0);
|
||
return gpuInstances.map(i => connector.normalizeGpuInstance(i, providerId));
|
||
},
|
||
getG8Instances: async (): Promise<G8InstanceInput[]> => {
|
||
// Use cached instances if available to avoid redundant API calls
|
||
if (!cachedInstanceTypes) {
|
||
this.logger.info('Fetching instance types for G8 extraction');
|
||
cachedInstanceTypes = await connector.fetchInstanceTypes();
|
||
}
|
||
|
||
// Filter and normalize G8 instances (g8- prefix)
|
||
const g8Instances = cachedInstanceTypes.filter(i =>
|
||
i.id.startsWith('g8-') && (!i.gpus || i.gpus === 0)
|
||
);
|
||
return g8Instances.map(i => connector.normalizeG8Instance(i, providerId));
|
||
},
|
||
getVpuInstances: async (): Promise<VpuInstanceInput[]> => {
|
||
// Use cached instances if available to avoid redundant API calls
|
||
if (!cachedInstanceTypes) {
|
||
this.logger.info('Fetching instance types for VPU extraction');
|
||
cachedInstanceTypes = await connector.fetchInstanceTypes();
|
||
}
|
||
|
||
// Filter and normalize VPU instances (netint or accelerated)
|
||
const vpuInstances = cachedInstanceTypes.filter(i =>
|
||
(i.id.includes('netint') || i.id.includes('accelerated')) && (!i.gpus || i.gpus === 0)
|
||
);
|
||
return vpuInstances.map(i => connector.normalizeVpuInstance(i, providerId));
|
||
},
|
||
getPricing: async (
|
||
_instanceTypeIds: number[],
|
||
regionIds: number[],
|
||
dbInstanceMap: Map<number, { instance_id: string }>,
|
||
dbGpuMap?: Map<number, { instance_id: string }>,
|
||
dbG8Map?: Map<number, { instance_id: string }>,
|
||
dbVpuMap?: Map<number, { instance_id: string }>
|
||
): Promise<number> => {
|
||
/**
|
||
* Linode Pricing Extraction Strategy (Generator Pattern):
|
||
*
|
||
* Linode pricing is embedded in instance type data (price.hourly, price.monthly).
|
||
* Generate all region × instance combinations using generator pattern.
|
||
* GPU instances are separated and stored in gpu_pricing table.
|
||
*
|
||
* Expected volume: ~190 regular + ~10 GPU instances × 20 regions = ~4,000 pricing records
|
||
* Generator pattern with 100 records/batch minimizes memory usage
|
||
* Each batch is immediately persisted to database to avoid memory buildup
|
||
*
|
||
* Memory savings: ~95% (4,000 records → 100 records in memory at a time)
|
||
*
|
||
* Manual Test:
|
||
* 1. Run sync: curl -X POST http://localhost:8787/api/sync/linode
|
||
* 2. Verify regular pricing: wrangler d1 execute cloud-instances-db --local --command "SELECT COUNT(*) FROM pricing WHERE instance_type_id IN (SELECT id FROM instance_types WHERE provider_id = (SELECT id FROM providers WHERE name = 'linode'))"
|
||
* 3. Verify GPU pricing: wrangler d1 execute cloud-instances-db --local --command "SELECT COUNT(*) FROM gpu_pricing WHERE gpu_instance_id IN (SELECT id FROM gpu_instances WHERE provider_id = (SELECT id FROM providers WHERE name = 'linode'))"
|
||
* 4. Sample pricing: wrangler d1 execute cloud-instances-db --local --command "SELECT p.*, i.instance_name, r.region_code FROM pricing p JOIN instance_types i ON p.instance_type_id = i.id JOIN regions r ON p.region_id = r.id WHERE i.provider_id = (SELECT id FROM providers WHERE name = 'linode') LIMIT 10"
|
||
*/
|
||
|
||
// Re-fetch instance types if not cached
|
||
if (!cachedInstanceTypes) {
|
||
this.logger.info('Fetching instance types for pricing extraction');
|
||
cachedInstanceTypes = await connector.fetchInstanceTypes();
|
||
}
|
||
|
||
// Create lookup map for raw instance data by instance_id (API ID)
|
||
const rawInstanceMap = new Map(
|
||
cachedInstanceTypes.map(i => [i.id, i])
|
||
);
|
||
|
||
// Use provided maps or create empty ones
|
||
const gpuMap = dbGpuMap || new Map();
|
||
const g8Map = dbG8Map || new Map();
|
||
const vpuMap = dbVpuMap || new Map();
|
||
|
||
// Separate instances by type: GPU, VPU, G8, and regular
|
||
const gpuInstanceTypeIds: number[] = [];
|
||
const g8InstanceTypeIds: number[] = [];
|
||
const vpuInstanceTypeIds: number[] = [];
|
||
const regularInstanceTypeIds: number[] = [];
|
||
|
||
// Extract GPU instance IDs from gpuMap
|
||
for (const dbId of gpuMap.keys()) {
|
||
gpuInstanceTypeIds.push(dbId);
|
||
}
|
||
|
||
// Extract G8 instance IDs from g8Map
|
||
for (const dbId of g8Map.keys()) {
|
||
g8InstanceTypeIds.push(dbId);
|
||
}
|
||
|
||
// Extract VPU instance IDs from vpuMap
|
||
for (const dbId of vpuMap.keys()) {
|
||
vpuInstanceTypeIds.push(dbId);
|
||
}
|
||
|
||
// Regular instances from dbInstanceMap
|
||
for (const dbId of dbInstanceMap.keys()) {
|
||
regularInstanceTypeIds.push(dbId);
|
||
}
|
||
|
||
// Process regular instance pricing
|
||
let regularPricingCount = 0;
|
||
if (regularInstanceTypeIds.length > 0) {
|
||
const regularGenerator = this.generateLinodePricingBatches(
|
||
regularInstanceTypeIds,
|
||
regionIds,
|
||
dbInstanceMap,
|
||
rawInstanceMap,
|
||
this.env
|
||
);
|
||
|
||
for (const batch of regularGenerator) {
|
||
const batchCount = await this.repos.pricing.upsertMany(batch);
|
||
regularPricingCount += batchCount;
|
||
}
|
||
}
|
||
|
||
// Process GPU instance pricing
|
||
let gpuPricingCount = 0;
|
||
if (gpuInstanceTypeIds.length > 0) {
|
||
const gpuGenerator = this.generateLinodeGpuPricingBatches(
|
||
gpuInstanceTypeIds,
|
||
regionIds,
|
||
gpuMap,
|
||
rawInstanceMap,
|
||
this.env
|
||
);
|
||
|
||
for (const batch of gpuGenerator) {
|
||
const batchCount = await this.repos.gpuPricing.upsertMany(batch);
|
||
gpuPricingCount += batchCount;
|
||
}
|
||
}
|
||
|
||
// Process G8 instance pricing
|
||
let g8PricingCount = 0;
|
||
if (g8InstanceTypeIds.length > 0) {
|
||
const g8Generator = this.generateLinodeG8PricingBatches(
|
||
g8InstanceTypeIds,
|
||
regionIds,
|
||
g8Map,
|
||
rawInstanceMap,
|
||
this.env
|
||
);
|
||
|
||
for (const batch of g8Generator) {
|
||
const batchCount = await this.repos.g8Pricing.upsertMany(batch);
|
||
g8PricingCount += batchCount;
|
||
}
|
||
}
|
||
|
||
// Process VPU instance pricing
|
||
let vpuPricingCount = 0;
|
||
if (vpuInstanceTypeIds.length > 0) {
|
||
const vpuGenerator = this.generateLinodeVpuPricingBatches(
|
||
vpuInstanceTypeIds,
|
||
regionIds,
|
||
vpuMap,
|
||
rawInstanceMap,
|
||
this.env
|
||
);
|
||
|
||
for (const batch of vpuGenerator) {
|
||
const batchCount = await this.repos.vpuPricing.upsertMany(batch);
|
||
vpuPricingCount += batchCount;
|
||
}
|
||
}
|
||
|
||
const totalCount = regularPricingCount + gpuPricingCount + g8PricingCount + vpuPricingCount;
|
||
this.logger.info('Generated and upserted pricing records for Linode', {
|
||
regular_pricing: regularPricingCount,
|
||
gpu_pricing: gpuPricingCount,
|
||
g8_pricing: g8PricingCount,
|
||
vpu_pricing: vpuPricingCount,
|
||
total: totalCount
|
||
});
|
||
|
||
// Return total count of processed records
|
||
return totalCount;
|
||
},
|
||
};
|
||
}
|
||
|
||
case 'vultr': {
|
||
const connector = new VultrConnector(this.env);
|
||
// Cache plans for pricing extraction
|
||
let cachedPlans: Awaited<ReturnType<typeof connector.fetchPlans>> | null = null;
|
||
|
||
return {
|
||
authenticate: () => connector.initialize(),
|
||
getRegions: async () => {
|
||
const regions = await connector.fetchRegions();
|
||
return regions.map(r => connector.normalizeRegion(r, providerId));
|
||
},
|
||
getInstanceTypes: async () => {
|
||
const plans = await connector.fetchPlans();
|
||
cachedPlans = plans; // Cache for pricing
|
||
|
||
// Filter out GPU instances (vcg type)
|
||
const regularPlans = plans.filter(p => !p.id.startsWith('vcg'));
|
||
return regularPlans.map(p => connector.normalizeInstance(p, providerId));
|
||
},
|
||
getGpuInstances: async (): Promise<GpuInstanceInput[]> => {
|
||
// Use cached plans if available to avoid redundant API calls
|
||
if (!cachedPlans) {
|
||
this.logger.info('Fetching plans for GPU extraction');
|
||
cachedPlans = await connector.fetchPlans();
|
||
}
|
||
|
||
// Filter and normalize GPU instances (vcg type)
|
||
const gpuPlans = cachedPlans.filter(p => p.id.startsWith('vcg'));
|
||
return gpuPlans.map(p => connector.normalizeGpuInstance(p, providerId));
|
||
},
|
||
getPricing: async (
|
||
instanceTypeIds: number[],
|
||
regionIds: number[],
|
||
dbInstanceMap: Map<number, { instance_id: string }>,
|
||
dbGpuMap?: Map<number, { instance_id: string }>
|
||
): Promise<number> => {
|
||
/**
|
||
* Vultr Pricing Extraction Strategy (Generator Pattern):
|
||
*
|
||
* Vultr pricing is embedded in plan data (monthly_cost).
|
||
* Generate all region × plan combinations using generator pattern.
|
||
*
|
||
* Expected volume: ~100 regular plans × 20 regions = ~2,000 pricing records
|
||
* ~35 GPU plans × 20 regions = ~700 GPU pricing records
|
||
* Generator pattern with 100 records/batch minimizes memory usage
|
||
* Each batch is immediately persisted to database to avoid memory buildup
|
||
*
|
||
* Memory savings: ~95% (2,700 records → 100 records in memory at a time)
|
||
*
|
||
* Manual Test:
|
||
* 1. Run sync: curl -X POST http://localhost:8787/api/sync/vultr
|
||
* 2. Verify pricing count: wrangler d1 execute cloud-instances-db --local --command "SELECT COUNT(*) FROM pricing WHERE instance_type_id IN (SELECT id FROM instance_types WHERE provider_id = (SELECT id FROM providers WHERE name = 'vultr'))"
|
||
* 3. Verify GPU pricing: wrangler d1 execute cloud-instances-db --local --command "SELECT COUNT(*) FROM gpu_pricing WHERE gpu_instance_id IN (SELECT id FROM gpu_instances WHERE provider_id = (SELECT id FROM providers WHERE name = 'vultr'))"
|
||
* 4. Sample pricing: wrangler d1 execute cloud-instances-db --local --command "SELECT p.*, i.instance_name, r.region_code FROM pricing p JOIN instance_types i ON p.instance_type_id = i.id JOIN regions r ON p.region_id = r.id WHERE i.provider_id = (SELECT id FROM providers WHERE name = 'vultr') LIMIT 10"
|
||
* 5. Verify data integrity: wrangler d1 execute cloud-instances-db --local --command "SELECT COUNT(*) FROM pricing WHERE hourly_price = 0 OR monthly_price = 0"
|
||
*/
|
||
|
||
// Re-fetch plans if not cached
|
||
if (!cachedPlans) {
|
||
this.logger.info('Fetching plans for pricing extraction');
|
||
cachedPlans = await connector.fetchPlans();
|
||
}
|
||
|
||
// Create lookup map for raw plan data by plan ID (API ID)
|
||
const rawPlanMap = new Map(
|
||
cachedPlans.map(p => [p.id, p])
|
||
);
|
||
|
||
// Process regular instance pricing
|
||
let regularPricingCount = 0;
|
||
if (instanceTypeIds.length > 0) {
|
||
const regularGenerator = this.generateVultrPricingBatches(
|
||
instanceTypeIds,
|
||
regionIds,
|
||
dbInstanceMap,
|
||
rawPlanMap,
|
||
this.env
|
||
);
|
||
|
||
for (const batch of regularGenerator) {
|
||
const batchCount = await this.repos.pricing.upsertMany(batch);
|
||
regularPricingCount += batchCount;
|
||
}
|
||
}
|
||
|
||
// Process GPU instance pricing
|
||
let gpuPricingCount = 0;
|
||
const gpuMap = dbGpuMap || new Map();
|
||
if (gpuMap.size > 0) {
|
||
const gpuInstanceTypeIds = Array.from(gpuMap.keys());
|
||
const gpuGenerator = this.generateVultrGpuPricingBatches(
|
||
gpuInstanceTypeIds,
|
||
regionIds,
|
||
gpuMap,
|
||
rawPlanMap,
|
||
this.env
|
||
);
|
||
|
||
for (const batch of gpuGenerator) {
|
||
const batchCount = await this.repos.gpuPricing.upsertMany(batch);
|
||
gpuPricingCount += batchCount;
|
||
}
|
||
}
|
||
|
||
const totalCount = regularPricingCount + gpuPricingCount;
|
||
this.logger.info('Generated and upserted pricing records for Vultr', {
|
||
regular_pricing: regularPricingCount,
|
||
gpu_pricing: gpuPricingCount,
|
||
total: totalCount
|
||
});
|
||
|
||
// Return total count of processed records
|
||
return totalCount;
|
||
},
|
||
};
|
||
}
|
||
|
||
case 'aws': {
|
||
const connector = new AWSConnector(this.env);
|
||
// Cache instance types for pricing extraction
|
||
let cachedInstanceTypes: Awaited<ReturnType<typeof connector.fetchInstanceTypes>> | null = null;
|
||
|
||
return {
|
||
authenticate: () => connector.initialize(),
|
||
getRegions: async () => {
|
||
const regions = await connector.fetchRegions();
|
||
return regions.map(r => connector.normalizeRegion(r, providerId));
|
||
},
|
||
getInstanceTypes: async () => {
|
||
const instances = await connector.fetchInstanceTypes();
|
||
cachedInstanceTypes = instances; // Cache for pricing
|
||
return instances.map(i => connector.normalizeInstance(i, providerId));
|
||
},
|
||
getPricing: async (
|
||
instanceTypeIds: number[],
|
||
regionIds: number[],
|
||
dbInstanceMap: Map<number, { instance_id: string }>
|
||
): Promise<number> => {
|
||
/**
|
||
* AWS Pricing Extraction Strategy (Generator Pattern):
|
||
*
|
||
* AWS pricing from ec2.shop is region-agnostic (same price globally).
|
||
* Generate all region × instance combinations using generator pattern.
|
||
*
|
||
* Expected volume: ~870 instances × 29 regions = ~25,230 pricing records
|
||
* Generator pattern with 100 records/batch minimizes memory usage
|
||
* Each batch is immediately persisted to database to avoid memory buildup
|
||
*
|
||
* Manual Test:
|
||
* 1. Run sync: curl -X POST http://localhost:8787/api/sync/aws
|
||
* 2. Verify pricing count: wrangler d1 execute cloud-instances-db --local --command "SELECT COUNT(*) FROM pricing WHERE instance_type_id IN (SELECT id FROM instance_types WHERE provider_id = (SELECT id FROM providers WHERE name = 'aws'))"
|
||
* 3. Sample pricing: wrangler d1 execute cloud-instances-db --local --command "SELECT p.*, i.instance_name, r.region_code FROM pricing p JOIN instance_types i ON p.instance_type_id = i.id JOIN regions r ON p.region_id = r.id WHERE i.provider_id = (SELECT id FROM providers WHERE name = 'aws') LIMIT 10"
|
||
* 4. Verify data integrity: wrangler d1 execute cloud-instances-db --local --command "SELECT COUNT(*) FROM pricing WHERE hourly_price = 0 OR monthly_price = 0"
|
||
*/
|
||
|
||
// Re-fetch instance types if not cached
|
||
if (!cachedInstanceTypes) {
|
||
this.logger.info('Fetching instance types for pricing extraction');
|
||
cachedInstanceTypes = await connector.fetchInstanceTypes();
|
||
}
|
||
|
||
// Create lookup map for raw instance data by instance_id (API ID)
|
||
const rawInstanceMap = new Map(
|
||
cachedInstanceTypes.map(i => [i.InstanceType, i])
|
||
);
|
||
|
||
// Use generator pattern for memory-efficient processing
|
||
const pricingGenerator = this.generateAWSPricingBatches(
|
||
instanceTypeIds,
|
||
regionIds,
|
||
dbInstanceMap,
|
||
rawInstanceMap
|
||
);
|
||
|
||
// Process batches incrementally
|
||
let totalCount = 0;
|
||
for (const batch of pricingGenerator) {
|
||
const batchCount = await this.repos.pricing.upsertMany(batch);
|
||
totalCount += batchCount;
|
||
}
|
||
|
||
this.logger.info('Generated and upserted pricing records for AWS', { count: totalCount });
|
||
|
||
// Return total count of processed records
|
||
return totalCount;
|
||
},
|
||
};
|
||
}
|
||
|
||
default:
|
||
throw new Error(`Unsupported provider: ${provider}`);
|
||
}
|
||
}
|
||
}
|