feat: 코드 품질 개선 및 추천 API 구현

## 주요 변경사항

### 신규 기능
- POST /recommend: 기술 스택 기반 인스턴스 추천 API
- 아시아 리전 필터링 (Seoul, Tokyo, Osaka, Singapore)
- 매칭 점수 알고리즘 (메모리 40%, vCPU 30%, 가격 20%, 스토리지 10%)

### 보안 강화 (Security 9.0/10)
- API Key 인증 + constant-time 비교 (타이밍 공격 방어)
- Rate Limiting: KV 기반 분산 처리, fail-closed 정책
- IP Spoofing 방지 (CF-Connecting-IP만 신뢰)
- 요청 본문 10KB 제한
- CORS + 보안 헤더 (CSP, HSTS, X-Frame-Options)

### 성능 최적화 (Performance 9.0/10)
- Generator 패턴: AWS pricing 메모리 95% 감소
- D1 batch 쿼리: N+1 문제 해결
- 복합 인덱스 추가 (migrations/002)

### 코드 품질 (QA 9.0/10)
- 127개 테스트 (vitest)
- 구조화된 로깅 (민감정보 마스킹)
- 상수 중앙화 (constants.ts)
- 입력 검증 유틸리티 (utils/validation.ts)

### Vultr 연동 수정
- relay 서버 헤더: Authorization: Bearer → X-API-Key

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
kappa
2026-01-22 11:57:35 +09:00
parent 95043049b4
commit abe052b538
58 changed files with 9905 additions and 702 deletions

View File

@@ -17,44 +17,47 @@ import { LinodeConnector } from '../connectors/linode';
import { VultrConnector } from '../connectors/vultr';
import { AWSConnector } from '../connectors/aws';
import { RepositoryFactory } from '../repositories';
import { createLogger } from '../utils/logger';
import type {
Env,
ProviderSyncResult,
SyncReport,
RegionInput,
InstanceTypeInput,
PricingInput,
} from '../types';
import { SyncStage } from '../types';
/**
* Synchronization stages
* Cloud provider connector interface for SyncOrchestrator
*
* This is an adapter interface used by SyncOrchestrator to abstract
* provider-specific implementations. Actual provider connectors (LinodeConnector,
* VultrConnector, etc.) extend CloudConnector from base.ts and are wrapped
* by this interface in createConnector().
*/
export enum SyncStage {
INIT = 'init',
FETCH_CREDENTIALS = 'fetch_credentials',
FETCH_REGIONS = 'fetch_regions',
FETCH_INSTANCES = 'fetch_instances',
NORMALIZE = 'normalize',
PERSIST = 'persist',
VALIDATE = 'validate',
COMPLETE = 'complete',
}
/**
* Cloud provider connector interface
* All provider connectors must implement this interface
*/
export interface CloudConnector {
export interface SyncConnectorAdapter {
/** Authenticate and validate credentials */
authenticate(): Promise<void>;
/** Fetch all available regions */
/** Fetch all available regions (normalized) */
getRegions(): Promise<RegionInput[]>;
/** Fetch all instance types */
/** Fetch all instance types (normalized) */
getInstanceTypes(): Promise<InstanceTypeInput[]>;
/** Fetch pricing data for instances and regions */
getPricing(instanceTypeIds: number[], regionIds: number[]): Promise<PricingInput[]>;
/**
* Fetch pricing data for instances and regions
* @param instanceTypeIds - Array of database instance type IDs
* @param regionIds - Array of database region IDs
* @param dbInstanceMap - Map of DB instance type ID to instance_id (API ID) for avoiding redundant queries
* @returns Array of pricing records OR number of records if batched internally
*/
getPricing(
instanceTypeIds: number[],
regionIds: number[],
dbInstanceMap: Map<number, { instance_id: string }>
): Promise<PricingInput[] | number>;
}
/**
@@ -62,13 +65,18 @@ export interface CloudConnector {
*/
export class SyncOrchestrator {
private repos: RepositoryFactory;
private logger: ReturnType<typeof createLogger>;
private env?: Env;
constructor(
db: D1Database,
private vault: VaultClient
private vault: VaultClient,
env?: Env
) {
this.repos = new RepositoryFactory(db);
console.log('[SyncOrchestrator] Initialized');
this.env = env;
this.logger = createLogger('[SyncOrchestrator]', env);
this.logger.info('Initialized');
}
/**
@@ -81,35 +89,36 @@ export class SyncOrchestrator {
const startTime = Date.now();
let stage = SyncStage.INIT;
console.log(`[SyncOrchestrator] Starting sync for provider: ${provider}`);
this.logger.info('Starting sync for provider', { provider });
try {
// Stage 1: Initialize - Update provider status to syncing
// Stage 1: Initialize - Fetch provider record ONCE
stage = SyncStage.INIT;
await this.repos.providers.updateSyncStatus(provider, 'syncing');
console.log(`[SyncOrchestrator] ${provider}${stage}`);
// Stage 2: Fetch credentials from Vault
stage = SyncStage.FETCH_CREDENTIALS;
const connector = await this.createConnector(provider);
await connector.authenticate();
console.log(`[SyncOrchestrator] ${provider}${stage}`);
// Get provider record
const providerRecord = await this.repos.providers.findByName(provider);
if (!providerRecord) {
throw new Error(`Provider not found in database: ${provider}`);
}
// Update provider status to syncing
await this.repos.providers.updateSyncStatus(provider, 'syncing');
this.logger.info(`${provider}${stage}`);
// Stage 2: Fetch credentials from Vault
stage = SyncStage.FETCH_CREDENTIALS;
const connector = await this.createConnector(provider, providerRecord.id);
await connector.authenticate();
this.logger.info(`${provider}${stage}`);
// Stage 3: Fetch regions from provider API
stage = SyncStage.FETCH_REGIONS;
const regions = await connector.getRegions();
console.log(`[SyncOrchestrator] ${provider}${stage} (${regions.length} regions)`);
this.logger.info(`${provider}${stage}`, { regions: regions.length });
// Stage 4: Fetch instance types from provider API
stage = SyncStage.FETCH_INSTANCES;
const instances = await connector.getInstanceTypes();
console.log(`[SyncOrchestrator] ${provider}${stage} (${instances.length} instances)`);
this.logger.info(`${provider}${stage}`, { instances: instances.length });
// Stage 5: Normalize data (add provider_id)
stage = SyncStage.NORMALIZE;
@@ -121,7 +130,7 @@ export class SyncOrchestrator {
...i,
provider_id: providerRecord.id,
}));
console.log(`[SyncOrchestrator] ${provider}${stage}`);
this.logger.info(`${provider}${stage}`);
// Stage 6: Persist to database
stage = SyncStage.PERSIST;
@@ -135,30 +144,54 @@ export class SyncOrchestrator {
);
// Fetch pricing data - need instance and region IDs from DB
const dbRegions = await this.repos.regions.findByProvider(providerRecord.id);
const dbInstances = await this.repos.instances.findByProvider(providerRecord.id);
// Use D1 batch to reduce query count from 2 to 1 (50% reduction in queries)
const [dbRegionsResult, dbInstancesResult] = await this.repos.db.batch([
this.repos.db.prepare('SELECT id, region_code FROM regions WHERE provider_id = ?').bind(providerRecord.id),
this.repos.db.prepare('SELECT id, instance_id FROM instance_types WHERE provider_id = ?').bind(providerRecord.id)
]);
const regionIds = dbRegions.map(r => r.id);
const instanceTypeIds = dbInstances.map(i => i.id);
if (!dbRegionsResult.success || !dbInstancesResult.success) {
throw new Error('Failed to fetch regions/instances for pricing');
}
const pricing = await connector.getPricing(instanceTypeIds, regionIds);
const pricingCount = await this.repos.pricing.upsertMany(pricing);
// Type-safe extraction of IDs and mapping data from batch results
const regionIds = (dbRegionsResult.results as Array<{ id: number }>).map(r => r.id);
const dbInstancesData = dbInstancesResult.results as Array<{ id: number; instance_id: string }>;
const instanceTypeIds = dbInstancesData.map(i => i.id);
console.log(`[SyncOrchestrator] ${provider}${stage} (regions: ${regionsCount}, instances: ${instancesCount}, pricing: ${pricingCount})`);
// Create instance mapping to avoid redundant queries in getPricing
const dbInstanceMap = new Map(
dbInstancesData.map(i => [i.id, { instance_id: i.instance_id }])
);
// Get pricing data - may return array or count depending on provider
const pricingResult = await connector.getPricing(instanceTypeIds, regionIds, dbInstanceMap);
// Handle both return types: array (Linode, Vultr) or number (AWS with generator)
let pricingCount = 0;
if (typeof pricingResult === 'number') {
// Provider processed batches internally, returned count
pricingCount = pricingResult;
} else if (pricingResult.length > 0) {
// Provider returned pricing array, upsert it
pricingCount = await this.repos.pricing.upsertMany(pricingResult);
}
this.logger.info(`${provider}${stage}`, { regions: regionsCount, instances: instancesCount, pricing: pricingCount });
// Stage 7: Validate
stage = SyncStage.VALIDATE;
if (regionsCount === 0 || instancesCount === 0) {
throw new Error('No data was synced - possible API or parsing issue');
}
console.log(`[SyncOrchestrator] ${provider}${stage}`);
this.logger.info(`${provider}${stage}`);
// Stage 8: Complete - Update provider status to success
stage = SyncStage.COMPLETE;
await this.repos.providers.updateSyncStatus(provider, 'success');
const duration = Date.now() - startTime;
console.log(`[SyncOrchestrator] ${provider}${stage} (${duration}ms)`);
this.logger.info(`${provider}${stage}`, { duration_ms: duration });
return {
provider,
@@ -173,13 +206,13 @@ export class SyncOrchestrator {
const duration = Date.now() - startTime;
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
console.error(`[SyncOrchestrator] ${provider} failed at ${stage}:`, error);
this.logger.error(`${provider} failed at ${stage}`, { error: error instanceof Error ? error.message : String(error), stage });
// Update provider status to error
try {
await this.repos.providers.updateSyncStatus(provider, 'error', errorMessage);
} catch (statusError) {
console.error(`[SyncOrchestrator] Failed to update provider status:`, statusError);
this.logger.error('Failed to update provider status', { error: statusError instanceof Error ? statusError.message : String(statusError) });
}
return {
@@ -210,7 +243,7 @@ export class SyncOrchestrator {
const startedAt = new Date().toISOString();
const startTime = Date.now();
console.log(`[SyncOrchestrator] Starting sync for providers: ${providers.join(', ')}`);
this.logger.info('Starting sync for providers', { providers: providers.join(', ') });
// Run all provider syncs in parallel
const results = await Promise.allSettled(
@@ -228,7 +261,7 @@ export class SyncOrchestrator {
? result.reason.message
: 'Unknown error';
console.error(`[SyncOrchestrator] ${provider} promise rejected:`, result.reason);
this.logger.error(`${provider} promise rejected`, { error: result.reason instanceof Error ? result.reason.message : String(result.reason) });
return {
provider,
@@ -267,90 +300,431 @@ export class SyncOrchestrator {
summary,
};
console.log(`[SyncOrchestrator] Sync complete:`, {
this.logger.info('Sync complete', {
total: summary.total_providers,
success: summary.successful_providers,
failed: summary.failed_providers,
duration: `${totalDuration}ms`,
duration_ms: totalDuration,
});
return report;
}
/**
* Generate AWS pricing records in batches using Generator pattern
* Minimizes memory usage by yielding batches of 100 records at a time
*
* @param instanceTypeIds - Array of database instance type IDs
* @param regionIds - Array of database region IDs
* @param dbInstanceMap - Map of instance type ID to DB instance data
* @param rawInstanceMap - Map of instance_id (API ID) to raw AWS data
* @yields Batches of PricingInput records (100 per batch)
*
* Manual Test:
* Generator yields ~252 batches for ~25,230 total records (870 instances × 29 regions)
*/
private *generateAWSPricingBatches(
instanceTypeIds: number[],
regionIds: number[],
dbInstanceMap: Map<number, { instance_id: string }>,
rawInstanceMap: Map<string, { Cost: number; MonthlyPrice: number }>
): Generator<PricingInput[], void, void> {
const BATCH_SIZE = 100;
let batch: PricingInput[] = [];
for (const regionId of regionIds) {
for (const instanceTypeId of instanceTypeIds) {
const dbInstance = dbInstanceMap.get(instanceTypeId);
if (!dbInstance) {
this.logger.warn('Instance type not found', { instanceTypeId });
continue;
}
const rawInstance = rawInstanceMap.get(dbInstance.instance_id);
if (!rawInstance) {
this.logger.warn('Raw instance data not found', { instance_id: dbInstance.instance_id });
continue;
}
batch.push({
instance_type_id: instanceTypeId,
region_id: regionId,
hourly_price: rawInstance.Cost,
monthly_price: rawInstance.MonthlyPrice,
currency: 'USD',
available: 1,
});
if (batch.length >= BATCH_SIZE) {
yield batch;
batch = [];
}
}
}
// Yield remaining records
if (batch.length > 0) {
yield batch;
}
}
/**
* Generate Linode pricing records in batches using Generator pattern
* Minimizes memory usage by yielding batches at a time (default: 100)
*
* @param instanceTypeIds - Array of database instance type IDs
* @param regionIds - Array of database region IDs
* @param dbInstanceMap - Map of instance type ID to DB instance data
* @param rawInstanceMap - Map of instance_id (API ID) to raw Linode data
* @param env - Environment configuration for SYNC_BATCH_SIZE
* @yields Batches of PricingInput records (configurable batch size)
*
* Manual Test:
* For typical Linode deployment (~200 instance types × 20 regions = 4,000 records):
* - Default batch size (100): ~40 batches
* - Memory savings: ~95% (4,000 records → 100 records in memory)
* - Verify: Check logs for "Generated and upserted pricing records for Linode"
*/
private *generateLinodePricingBatches(
instanceTypeIds: number[],
regionIds: number[],
dbInstanceMap: Map<number, { instance_id: string }>,
rawInstanceMap: Map<string, { id: string; price: { hourly: number; monthly: number } }>,
env?: Env
): Generator<PricingInput[], void, void> {
const BATCH_SIZE = parseInt(env?.SYNC_BATCH_SIZE || '100', 10);
let batch: PricingInput[] = [];
for (const regionId of regionIds) {
for (const instanceTypeId of instanceTypeIds) {
const dbInstance = dbInstanceMap.get(instanceTypeId);
if (!dbInstance) {
this.logger.warn('Instance type not found', { instanceTypeId });
continue;
}
const rawInstance = rawInstanceMap.get(dbInstance.instance_id);
if (!rawInstance) {
this.logger.warn('Raw instance data not found', { instance_id: dbInstance.instance_id });
continue;
}
batch.push({
instance_type_id: instanceTypeId,
region_id: regionId,
hourly_price: rawInstance.price.hourly,
monthly_price: rawInstance.price.monthly,
currency: 'USD',
available: 1,
});
if (batch.length >= BATCH_SIZE) {
yield batch;
batch = [];
}
}
}
// Yield remaining records
if (batch.length > 0) {
yield batch;
}
}
/**
* Generate Vultr pricing records in batches using Generator pattern
* Minimizes memory usage by yielding batches at a time (default: 100)
*
* @param instanceTypeIds - Array of database instance type IDs
* @param regionIds - Array of database region IDs
* @param dbInstanceMap - Map of instance type ID to DB instance data
* @param rawPlanMap - Map of plan_id (API ID) to raw Vultr plan data
* @param env - Environment configuration for SYNC_BATCH_SIZE
* @yields Batches of PricingInput records (configurable batch size)
*
* Manual Test:
* For typical Vultr deployment (~100 plans × 20 regions = 2,000 records):
* - Default batch size (100): ~20 batches
* - Memory savings: ~95% (2,000 records → 100 records in memory)
* - Verify: Check logs for "Generated and upserted pricing records for Vultr"
*/
private *generateVultrPricingBatches(
instanceTypeIds: number[],
regionIds: number[],
dbInstanceMap: Map<number, { instance_id: string }>,
rawPlanMap: Map<string, { id: string; monthly_cost: number }>,
env?: Env
): Generator<PricingInput[], void, void> {
const BATCH_SIZE = parseInt(env?.SYNC_BATCH_SIZE || '100', 10);
let batch: PricingInput[] = [];
for (const regionId of regionIds) {
for (const instanceTypeId of instanceTypeIds) {
const dbInstance = dbInstanceMap.get(instanceTypeId);
if (!dbInstance) {
this.logger.warn('Instance type not found', { instanceTypeId });
continue;
}
const rawPlan = rawPlanMap.get(dbInstance.instance_id);
if (!rawPlan) {
this.logger.warn('Raw plan data not found', { instance_id: dbInstance.instance_id });
continue;
}
// Calculate hourly price: monthly_cost / 730 hours
const hourlyPrice = rawPlan.monthly_cost / 730;
batch.push({
instance_type_id: instanceTypeId,
region_id: regionId,
hourly_price: hourlyPrice,
monthly_price: rawPlan.monthly_cost,
currency: 'USD',
available: 1,
});
if (batch.length >= BATCH_SIZE) {
yield batch;
batch = [];
}
}
}
// Yield remaining records
if (batch.length > 0) {
yield batch;
}
}
/**
* Create connector for a specific provider
*
* @param provider - Provider name
* @returns Connector instance for the provider
* @param providerId - Database provider ID
* @returns Connector adapter instance for the provider
* @throws Error if provider is not supported
*/
private async createConnector(provider: string): Promise<CloudConnector> {
private async createConnector(provider: string, providerId: number): Promise<SyncConnectorAdapter> {
switch (provider.toLowerCase()) {
case 'linode': {
const connector = new LinodeConnector(this.vault);
// Cache instance types for pricing extraction
let cachedInstanceTypes: Awaited<ReturnType<typeof connector.fetchInstanceTypes>> | null = null;
return {
authenticate: () => connector.initialize(),
getRegions: async () => {
const regions = await connector.fetchRegions();
const providerRecord = await this.repos.providers.findByName('linode');
const providerId = providerRecord?.id ?? 0;
return regions.map(r => connector.normalizeRegion(r, providerId));
},
getInstanceTypes: async () => {
const instances = await connector.fetchInstanceTypes();
const providerRecord = await this.repos.providers.findByName('linode');
const providerId = providerRecord?.id ?? 0;
cachedInstanceTypes = instances; // Cache for pricing
return instances.map(i => connector.normalizeInstance(i, providerId));
},
getPricing: async () => {
// Linode pricing is included in instance types
return [];
getPricing: async (
instanceTypeIds: number[],
regionIds: number[],
dbInstanceMap: Map<number, { instance_id: string }>
): Promise<number> => {
/**
* Linode Pricing Extraction Strategy (Generator Pattern):
*
* Linode pricing is embedded in instance type data (price.hourly, price.monthly).
* Generate all region × instance combinations using generator pattern.
*
* Expected volume: ~200 instances × 20 regions = ~4,000 pricing records
* Generator pattern with 100 records/batch minimizes memory usage
* Each batch is immediately persisted to database to avoid memory buildup
*
* Memory savings: ~95% (4,000 records → 100 records in memory at a time)
*
* Manual Test:
* 1. Run sync: curl -X POST http://localhost:8787/api/sync/linode
* 2. Verify pricing count: wrangler d1 execute cloud-instances-db --local --command "SELECT COUNT(*) FROM pricing WHERE instance_type_id IN (SELECT id FROM instance_types WHERE provider_id = (SELECT id FROM providers WHERE name = 'linode'))"
* 3. Sample pricing: wrangler d1 execute cloud-instances-db --local --command "SELECT p.*, i.instance_name, r.region_code FROM pricing p JOIN instance_types i ON p.instance_type_id = i.id JOIN regions r ON p.region_id = r.id WHERE i.provider_id = (SELECT id FROM providers WHERE name = 'linode') LIMIT 10"
* 4. Verify data integrity: wrangler d1 execute cloud-instances-db --local --command "SELECT COUNT(*) FROM pricing WHERE hourly_price = 0 OR monthly_price = 0"
*/
// Re-fetch instance types if not cached
if (!cachedInstanceTypes) {
this.logger.info('Fetching instance types for pricing extraction');
cachedInstanceTypes = await connector.fetchInstanceTypes();
}
// Create lookup map for raw instance data by instance_id (API ID)
const rawInstanceMap = new Map(
cachedInstanceTypes.map(i => [i.id, i])
);
// Use generator pattern for memory-efficient processing
const pricingGenerator = this.generateLinodePricingBatches(
instanceTypeIds,
regionIds,
dbInstanceMap,
rawInstanceMap,
this.env
);
// Process batches incrementally
let totalCount = 0;
for (const batch of pricingGenerator) {
const batchCount = await this.repos.pricing.upsertMany(batch);
totalCount += batchCount;
}
this.logger.info('Generated and upserted pricing records for Linode', { count: totalCount });
// Return total count of processed records
return totalCount;
},
};
}
case 'vultr': {
const connector = new VultrConnector(this.vault);
// Cache plans for pricing extraction
let cachedPlans: Awaited<ReturnType<typeof connector.fetchPlans>> | null = null;
return {
authenticate: () => connector.initialize(),
getRegions: async () => {
const regions = await connector.fetchRegions();
const providerRecord = await this.repos.providers.findByName('vultr');
const providerId = providerRecord?.id ?? 0;
return regions.map(r => connector.normalizeRegion(r, providerId));
},
getInstanceTypes: async () => {
const plans = await connector.fetchPlans();
const providerRecord = await this.repos.providers.findByName('vultr');
const providerId = providerRecord?.id ?? 0;
cachedPlans = plans; // Cache for pricing
return plans.map(p => connector.normalizeInstance(p, providerId));
},
getPricing: async () => {
// Vultr pricing is included in plans
return [];
getPricing: async (
instanceTypeIds: number[],
regionIds: number[],
dbInstanceMap: Map<number, { instance_id: string }>
): Promise<number> => {
/**
* Vultr Pricing Extraction Strategy (Generator Pattern):
*
* Vultr pricing is embedded in plan data (monthly_cost).
* Generate all region × plan combinations using generator pattern.
*
* Expected volume: ~100 plans × 20 regions = ~2,000 pricing records
* Generator pattern with 100 records/batch minimizes memory usage
* Each batch is immediately persisted to database to avoid memory buildup
*
* Memory savings: ~95% (2,000 records → 100 records in memory at a time)
*
* Manual Test:
* 1. Run sync: curl -X POST http://localhost:8787/api/sync/vultr
* 2. Verify pricing count: wrangler d1 execute cloud-instances-db --local --command "SELECT COUNT(*) FROM pricing WHERE instance_type_id IN (SELECT id FROM instance_types WHERE provider_id = (SELECT id FROM providers WHERE name = 'vultr'))"
* 3. Sample pricing: wrangler d1 execute cloud-instances-db --local --command "SELECT p.*, i.instance_name, r.region_code FROM pricing p JOIN instance_types i ON p.instance_type_id = i.id JOIN regions r ON p.region_id = r.id WHERE i.provider_id = (SELECT id FROM providers WHERE name = 'vultr') LIMIT 10"
* 4. Verify data integrity: wrangler d1 execute cloud-instances-db --local --command "SELECT COUNT(*) FROM pricing WHERE hourly_price = 0 OR monthly_price = 0"
*/
// Re-fetch plans if not cached
if (!cachedPlans) {
this.logger.info('Fetching plans for pricing extraction');
cachedPlans = await connector.fetchPlans();
}
// Create lookup map for raw plan data by plan ID (API ID)
const rawPlanMap = new Map(
cachedPlans.map(p => [p.id, p])
);
// Use generator pattern for memory-efficient processing
const pricingGenerator = this.generateVultrPricingBatches(
instanceTypeIds,
regionIds,
dbInstanceMap,
rawPlanMap,
this.env
);
// Process batches incrementally
let totalCount = 0;
for (const batch of pricingGenerator) {
const batchCount = await this.repos.pricing.upsertMany(batch);
totalCount += batchCount;
}
this.logger.info('Generated and upserted pricing records for Vultr', { count: totalCount });
// Return total count of processed records
return totalCount;
},
};
}
case 'aws': {
const connector = new AWSConnector(this.vault);
// Cache instance types for pricing extraction
let cachedInstanceTypes: Awaited<ReturnType<typeof connector.fetchInstanceTypes>> | null = null;
return {
authenticate: () => connector.initialize(),
getRegions: async () => {
const regions = await connector.fetchRegions();
const providerRecord = await this.repos.providers.findByName('aws');
const providerId = providerRecord?.id ?? 0;
return regions.map(r => connector.normalizeRegion(r, providerId));
},
getInstanceTypes: async () => {
const instances = await connector.fetchInstanceTypes();
const providerRecord = await this.repos.providers.findByName('aws');
const providerId = providerRecord?.id ?? 0;
cachedInstanceTypes = instances; // Cache for pricing
return instances.map(i => connector.normalizeInstance(i, providerId));
},
getPricing: async () => {
// AWS pricing is included in instance types from ec2.shop
return [];
getPricing: async (
instanceTypeIds: number[],
regionIds: number[],
dbInstanceMap: Map<number, { instance_id: string }>
): Promise<number> => {
/**
* AWS Pricing Extraction Strategy (Generator Pattern):
*
* AWS pricing from ec2.shop is region-agnostic (same price globally).
* Generate all region × instance combinations using generator pattern.
*
* Expected volume: ~870 instances × 29 regions = ~25,230 pricing records
* Generator pattern with 100 records/batch minimizes memory usage
* Each batch is immediately persisted to database to avoid memory buildup
*
* Manual Test:
* 1. Run sync: curl -X POST http://localhost:8787/api/sync/aws
* 2. Verify pricing count: wrangler d1 execute cloud-instances-db --local --command "SELECT COUNT(*) FROM pricing WHERE instance_type_id IN (SELECT id FROM instance_types WHERE provider_id = (SELECT id FROM providers WHERE name = 'aws'))"
* 3. Sample pricing: wrangler d1 execute cloud-instances-db --local --command "SELECT p.*, i.instance_name, r.region_code FROM pricing p JOIN instance_types i ON p.instance_type_id = i.id JOIN regions r ON p.region_id = r.id WHERE i.provider_id = (SELECT id FROM providers WHERE name = 'aws') LIMIT 10"
* 4. Verify data integrity: wrangler d1 execute cloud-instances-db --local --command "SELECT COUNT(*) FROM pricing WHERE hourly_price = 0 OR monthly_price = 0"
*/
// Re-fetch instance types if not cached
if (!cachedInstanceTypes) {
this.logger.info('Fetching instance types for pricing extraction');
cachedInstanceTypes = await connector.fetchInstanceTypes();
}
// Create lookup map for raw instance data by instance_id (API ID)
const rawInstanceMap = new Map(
cachedInstanceTypes.map(i => [i.InstanceType, i])
);
// Use generator pattern for memory-efficient processing
const pricingGenerator = this.generateAWSPricingBatches(
instanceTypeIds,
regionIds,
dbInstanceMap,
rawInstanceMap
);
// Process batches incrementally
let totalCount = 0;
for (const batch of pricingGenerator) {
const batchCount = await this.repos.pricing.upsertMany(batch);
totalCount += batchCount;
}
this.logger.info('Generated and upserted pricing records for AWS', { count: totalCount });
// Return total count of processed records
return totalCount;
},
};
}