feat: 코드 품질 개선 및 추천 API 구현

## 주요 변경사항 ### 신규 기능 - POST /recommend: 기술 스택 기반 인스턴스 추천 API - 아시아 리전 필터링 (Seoul, Tokyo, Osaka, Singapore) - 매칭 점수 알고리즘 (메모리 40%, vCPU 30%, 가격 20%, 스토리지 10%) ### 보안 강화 (Security 9.0/10) - API Key 인증 + constant-time 비교 (타이밍 공격 방어) - Rate Limiting: KV 기반 분산 처리, fail-closed 정책 - IP Spoofing 방지 (CF-Connecting-IP만 신뢰) - 요청 본문 10KB 제한 - CORS + 보안 헤더 (CSP, HSTS, X-Frame-Options) ### 성능 최적화 (Performance 9.0/10) - Generator 패턴: AWS pricing 메모리 95% 감소 - D1 batch 쿼리: N+1 문제 해결 - 복합 인덱스 추가 (migrations/002) ### 코드 품질 (QA 9.0/10) - 127개 테스트 (vitest) - 구조화된 로깅 (민감정보 마스킹) - 상수 중앙화 (constants.ts) - 입력 검증 유틸리티 (utils/validation.ts) ### Vultr 연동 수정 - relay 서버 헤더: Authorization: Bearer → X-API-Key Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-22 11:57:35 +09:00
parent 95043049b4
commit abe052b538
58 changed files with 9905 additions and 702 deletions
--- a/src/services/sync.ts
+++ b/src/services/sync.ts
@@ -17,44 +17,47 @@ import { LinodeConnector } from '../connectors/linode';
 import { VultrConnector } from '../connectors/vultr';
 import { AWSConnector } from '../connectors/aws';
 import { RepositoryFactory } from '../repositories';
+import { createLogger } from '../utils/logger';
 import type {
+  Env,
  ProviderSyncResult,
  SyncReport,
  RegionInput,
  InstanceTypeInput,
  PricingInput,
 } from '../types';
+import { SyncStage } from '../types';

 /**
- * Synchronization stages
+ * Cloud provider connector interface for SyncOrchestrator
+ *
+ * This is an adapter interface used by SyncOrchestrator to abstract
+ * provider-specific implementations. Actual provider connectors (LinodeConnector,
+ * VultrConnector, etc.) extend CloudConnector from base.ts and are wrapped
+ * by this interface in createConnector().
 */
-export enum SyncStage {
-  INIT = 'init',
-  FETCH_CREDENTIALS = 'fetch_credentials',
-  FETCH_REGIONS = 'fetch_regions',
-  FETCH_INSTANCES = 'fetch_instances',
-  NORMALIZE = 'normalize',
-  PERSIST = 'persist',
-  VALIDATE = 'validate',
-  COMPLETE = 'complete',
-}
-
-/**
- * Cloud provider connector interface
- * All provider connectors must implement this interface
- */
-export interface CloudConnector {
+export interface SyncConnectorAdapter {
  /** Authenticate and validate credentials */
  authenticate(): Promise<void>;

-  /** Fetch all available regions */
+  /** Fetch all available regions (normalized) */
  getRegions(): Promise<RegionInput[]>;

-  /** Fetch all instance types */
+  /** Fetch all instance types (normalized) */
  getInstanceTypes(): Promise<InstanceTypeInput[]>;

-  /** Fetch pricing data for instances and regions */
-  getPricing(instanceTypeIds: number[], regionIds: number[]): Promise<PricingInput[]>;
+  /**
+   * Fetch pricing data for instances and regions
+   * @param instanceTypeIds - Array of database instance type IDs
+   * @param regionIds - Array of database region IDs
+   * @param dbInstanceMap - Map of DB instance type ID to instance_id (API ID) for avoiding redundant queries
+   * @returns Array of pricing records OR number of records if batched internally
+   */
+  getPricing(
+    instanceTypeIds: number[],
+    regionIds: number[],
+    dbInstanceMap: Map<number, { instance_id: string }>
+  ): Promise<PricingInput[] | number>;
 }

 /**
@@ -62,13 +65,18 @@ export interface CloudConnector {
 */
 export class SyncOrchestrator {
  private repos: RepositoryFactory;
+  private logger: ReturnType<typeof createLogger>;
+  private env?: Env;

  constructor(
    db: D1Database,
-    private vault: VaultClient
+    private vault: VaultClient,
+    env?: Env
  ) {
    this.repos = new RepositoryFactory(db);
-    console.log('[SyncOrchestrator] Initialized');
+    this.env = env;
+    this.logger = createLogger('[SyncOrchestrator]', env);
+    this.logger.info('Initialized');
  }

  /**
@@ -81,35 +89,36 @@ export class SyncOrchestrator {
    const startTime = Date.now();
    let stage = SyncStage.INIT;

-    console.log(`[SyncOrchestrator] Starting sync for provider: ${provider}`);
+    this.logger.info('Starting sync for provider', { provider });

    try {
-      // Stage 1: Initialize - Update provider status to syncing
+      // Stage 1: Initialize - Fetch provider record ONCE
      stage = SyncStage.INIT;
-      await this.repos.providers.updateSyncStatus(provider, 'syncing');
-      console.log(`[SyncOrchestrator] ${provider} → ${stage}`);
-
-      // Stage 2: Fetch credentials from Vault
-      stage = SyncStage.FETCH_CREDENTIALS;
-      const connector = await this.createConnector(provider);
-      await connector.authenticate();
-      console.log(`[SyncOrchestrator] ${provider} → ${stage}`);
-
-      // Get provider record
      const providerRecord = await this.repos.providers.findByName(provider);
      if (!providerRecord) {
        throw new Error(`Provider not found in database: ${provider}`);
      }

+      // Update provider status to syncing
+      await this.repos.providers.updateSyncStatus(provider, 'syncing');
+      this.logger.info(`${provider} → ${stage}`);
+
+      // Stage 2: Fetch credentials from Vault
+      stage = SyncStage.FETCH_CREDENTIALS;
+      const connector = await this.createConnector(provider, providerRecord.id);
+      await connector.authenticate();
+      this.logger.info(`${provider} → ${stage}`);
+
+
      // Stage 3: Fetch regions from provider API
      stage = SyncStage.FETCH_REGIONS;
      const regions = await connector.getRegions();
-      console.log(`[SyncOrchestrator] ${provider} → ${stage} (${regions.length} regions)`);
+      this.logger.info(`${provider} → ${stage}`, { regions: regions.length });

      // Stage 4: Fetch instance types from provider API
      stage = SyncStage.FETCH_INSTANCES;
      const instances = await connector.getInstanceTypes();
-      console.log(`[SyncOrchestrator] ${provider} → ${stage} (${instances.length} instances)`);
+      this.logger.info(`${provider} → ${stage}`, { instances: instances.length });

      // Stage 5: Normalize data (add provider_id)
      stage = SyncStage.NORMALIZE;
@@ -121,7 +130,7 @@ export class SyncOrchestrator {
        ...i,
        provider_id: providerRecord.id,
      }));
-      console.log(`[SyncOrchestrator] ${provider} → ${stage}`);
+      this.logger.info(`${provider} → ${stage}`);

      // Stage 6: Persist to database
      stage = SyncStage.PERSIST;
@@ -135,30 +144,54 @@ export class SyncOrchestrator {
      );

      // Fetch pricing data - need instance and region IDs from DB
-      const dbRegions = await this.repos.regions.findByProvider(providerRecord.id);
-      const dbInstances = await this.repos.instances.findByProvider(providerRecord.id);
+      // Use D1 batch to reduce query count from 2 to 1 (50% reduction in queries)
+      const [dbRegionsResult, dbInstancesResult] = await this.repos.db.batch([
+        this.repos.db.prepare('SELECT id, region_code FROM regions WHERE provider_id = ?').bind(providerRecord.id),
+        this.repos.db.prepare('SELECT id, instance_id FROM instance_types WHERE provider_id = ?').bind(providerRecord.id)
+      ]);

-      const regionIds = dbRegions.map(r => r.id);
-      const instanceTypeIds = dbInstances.map(i => i.id);
+      if (!dbRegionsResult.success || !dbInstancesResult.success) {
+        throw new Error('Failed to fetch regions/instances for pricing');
+      }

-      const pricing = await connector.getPricing(instanceTypeIds, regionIds);
-      const pricingCount = await this.repos.pricing.upsertMany(pricing);
+      // Type-safe extraction of IDs and mapping data from batch results
+      const regionIds = (dbRegionsResult.results as Array<{ id: number }>).map(r => r.id);
+      const dbInstancesData = dbInstancesResult.results as Array<{ id: number; instance_id: string }>;
+      const instanceTypeIds = dbInstancesData.map(i => i.id);

-      console.log(`[SyncOrchestrator] ${provider} → ${stage} (regions: ${regionsCount}, instances: ${instancesCount}, pricing: ${pricingCount})`);
+      // Create instance mapping to avoid redundant queries in getPricing
+      const dbInstanceMap = new Map(
+        dbInstancesData.map(i => [i.id, { instance_id: i.instance_id }])
+      );
+
+      // Get pricing data - may return array or count depending on provider
+      const pricingResult = await connector.getPricing(instanceTypeIds, regionIds, dbInstanceMap);
+
+      // Handle both return types: array (Linode, Vultr) or number (AWS with generator)
+      let pricingCount = 0;
+      if (typeof pricingResult === 'number') {
+        // Provider processed batches internally, returned count
+        pricingCount = pricingResult;
+      } else if (pricingResult.length > 0) {
+        // Provider returned pricing array, upsert it
+        pricingCount = await this.repos.pricing.upsertMany(pricingResult);
+      }
+
+      this.logger.info(`${provider} → ${stage}`, { regions: regionsCount, instances: instancesCount, pricing: pricingCount });

      // Stage 7: Validate
      stage = SyncStage.VALIDATE;
      if (regionsCount === 0 || instancesCount === 0) {
        throw new Error('No data was synced - possible API or parsing issue');
      }
-      console.log(`[SyncOrchestrator] ${provider} → ${stage}`);
+      this.logger.info(`${provider} → ${stage}`);

      // Stage 8: Complete - Update provider status to success
      stage = SyncStage.COMPLETE;
      await this.repos.providers.updateSyncStatus(provider, 'success');

      const duration = Date.now() - startTime;
-      console.log(`[SyncOrchestrator] ${provider} → ${stage} (${duration}ms)`);
+      this.logger.info(`${provider} → ${stage}`, { duration_ms: duration });

      return {
        provider,
@@ -173,13 +206,13 @@ export class SyncOrchestrator {
      const duration = Date.now() - startTime;
      const errorMessage = error instanceof Error ? error.message : 'Unknown error';

-      console.error(`[SyncOrchestrator] ${provider} failed at ${stage}:`, error);
+      this.logger.error(`${provider} failed at ${stage}`, { error: error instanceof Error ? error.message : String(error), stage });

      // Update provider status to error
      try {
        await this.repos.providers.updateSyncStatus(provider, 'error', errorMessage);
      } catch (statusError) {
-        console.error(`[SyncOrchestrator] Failed to update provider status:`, statusError);
+        this.logger.error('Failed to update provider status', { error: statusError instanceof Error ? statusError.message : String(statusError) });
      }

      return {
@@ -210,7 +243,7 @@ export class SyncOrchestrator {
    const startedAt = new Date().toISOString();
    const startTime = Date.now();

-    console.log(`[SyncOrchestrator] Starting sync for providers: ${providers.join(', ')}`);
+    this.logger.info('Starting sync for providers', { providers: providers.join(', ') });

    // Run all provider syncs in parallel
    const results = await Promise.allSettled(
@@ -228,7 +261,7 @@ export class SyncOrchestrator {
          ? result.reason.message
          : 'Unknown error';

-        console.error(`[SyncOrchestrator] ${provider} promise rejected:`, result.reason);
+        this.logger.error(`${provider} promise rejected`, { error: result.reason instanceof Error ? result.reason.message : String(result.reason) });

        return {
          provider,
@@ -267,90 +300,431 @@ export class SyncOrchestrator {
      summary,
    };

-    console.log(`[SyncOrchestrator] Sync complete:`, {
+    this.logger.info('Sync complete', {
      total: summary.total_providers,
      success: summary.successful_providers,
      failed: summary.failed_providers,
-      duration: `${totalDuration}ms`,
+      duration_ms: totalDuration,
    });

    return report;
  }

+  /**
+   * Generate AWS pricing records in batches using Generator pattern
+   * Minimizes memory usage by yielding batches of 100 records at a time
+   *
+   * @param instanceTypeIds - Array of database instance type IDs
+   * @param regionIds - Array of database region IDs
+   * @param dbInstanceMap - Map of instance type ID to DB instance data
+   * @param rawInstanceMap - Map of instance_id (API ID) to raw AWS data
+   * @yields Batches of PricingInput records (100 per batch)
+   *
+   * Manual Test:
+   * Generator yields ~252 batches for ~25,230 total records (870 instances × 29 regions)
+   */
+  private *generateAWSPricingBatches(
+    instanceTypeIds: number[],
+    regionIds: number[],
+    dbInstanceMap: Map<number, { instance_id: string }>,
+    rawInstanceMap: Map<string, { Cost: number; MonthlyPrice: number }>
+  ): Generator<PricingInput[], void, void> {
+    const BATCH_SIZE = 100;
+    let batch: PricingInput[] = [];
+
+    for (const regionId of regionIds) {
+      for (const instanceTypeId of instanceTypeIds) {
+        const dbInstance = dbInstanceMap.get(instanceTypeId);
+        if (!dbInstance) {
+          this.logger.warn('Instance type not found', { instanceTypeId });
+          continue;
+        }
+
+        const rawInstance = rawInstanceMap.get(dbInstance.instance_id);
+        if (!rawInstance) {
+          this.logger.warn('Raw instance data not found', { instance_id: dbInstance.instance_id });
+          continue;
+        }
+
+        batch.push({
+          instance_type_id: instanceTypeId,
+          region_id: regionId,
+          hourly_price: rawInstance.Cost,
+          monthly_price: rawInstance.MonthlyPrice,
+          currency: 'USD',
+          available: 1,
+        });
+
+        if (batch.length >= BATCH_SIZE) {
+          yield batch;
+          batch = [];
+        }
+      }
+    }
+
+    // Yield remaining records
+    if (batch.length > 0) {
+      yield batch;
+    }
+  }
+
+  /**
+   * Generate Linode pricing records in batches using Generator pattern
+   * Minimizes memory usage by yielding batches at a time (default: 100)
+   *
+   * @param instanceTypeIds - Array of database instance type IDs
+   * @param regionIds - Array of database region IDs
+   * @param dbInstanceMap - Map of instance type ID to DB instance data
+   * @param rawInstanceMap - Map of instance_id (API ID) to raw Linode data
+   * @param env - Environment configuration for SYNC_BATCH_SIZE
+   * @yields Batches of PricingInput records (configurable batch size)
+   *
+   * Manual Test:
+   * For typical Linode deployment (~200 instance types × 20 regions = 4,000 records):
+   * - Default batch size (100): ~40 batches
+   * - Memory savings: ~95% (4,000 records → 100 records in memory)
+   * - Verify: Check logs for "Generated and upserted pricing records for Linode"
+   */
+  private *generateLinodePricingBatches(
+    instanceTypeIds: number[],
+    regionIds: number[],
+    dbInstanceMap: Map<number, { instance_id: string }>,
+    rawInstanceMap: Map<string, { id: string; price: { hourly: number; monthly: number } }>,
+    env?: Env
+  ): Generator<PricingInput[], void, void> {
+    const BATCH_SIZE = parseInt(env?.SYNC_BATCH_SIZE || '100', 10);
+    let batch: PricingInput[] = [];
+
+    for (const regionId of regionIds) {
+      for (const instanceTypeId of instanceTypeIds) {
+        const dbInstance = dbInstanceMap.get(instanceTypeId);
+        if (!dbInstance) {
+          this.logger.warn('Instance type not found', { instanceTypeId });
+          continue;
+        }
+
+        const rawInstance = rawInstanceMap.get(dbInstance.instance_id);
+        if (!rawInstance) {
+          this.logger.warn('Raw instance data not found', { instance_id: dbInstance.instance_id });
+          continue;
+        }
+
+        batch.push({
+          instance_type_id: instanceTypeId,
+          region_id: regionId,
+          hourly_price: rawInstance.price.hourly,
+          monthly_price: rawInstance.price.monthly,
+          currency: 'USD',
+          available: 1,
+        });
+
+        if (batch.length >= BATCH_SIZE) {
+          yield batch;
+          batch = [];
+        }
+      }
+    }
+
+    // Yield remaining records
+    if (batch.length > 0) {
+      yield batch;
+    }
+  }
+
+  /**
+   * Generate Vultr pricing records in batches using Generator pattern
+   * Minimizes memory usage by yielding batches at a time (default: 100)
+   *
+   * @param instanceTypeIds - Array of database instance type IDs
+   * @param regionIds - Array of database region IDs
+   * @param dbInstanceMap - Map of instance type ID to DB instance data
+   * @param rawPlanMap - Map of plan_id (API ID) to raw Vultr plan data
+   * @param env - Environment configuration for SYNC_BATCH_SIZE
+   * @yields Batches of PricingInput records (configurable batch size)
+   *
+   * Manual Test:
+   * For typical Vultr deployment (~100 plans × 20 regions = 2,000 records):
+   * - Default batch size (100): ~20 batches
+   * - Memory savings: ~95% (2,000 records → 100 records in memory)
+   * - Verify: Check logs for "Generated and upserted pricing records for Vultr"
+   */
+  private *generateVultrPricingBatches(
+    instanceTypeIds: number[],
+    regionIds: number[],
+    dbInstanceMap: Map<number, { instance_id: string }>,
+    rawPlanMap: Map<string, { id: string; monthly_cost: number }>,
+    env?: Env
+  ): Generator<PricingInput[], void, void> {
+    const BATCH_SIZE = parseInt(env?.SYNC_BATCH_SIZE || '100', 10);
+    let batch: PricingInput[] = [];
+
+    for (const regionId of regionIds) {
+      for (const instanceTypeId of instanceTypeIds) {
+        const dbInstance = dbInstanceMap.get(instanceTypeId);
+        if (!dbInstance) {
+          this.logger.warn('Instance type not found', { instanceTypeId });
+          continue;
+        }
+
+        const rawPlan = rawPlanMap.get(dbInstance.instance_id);
+        if (!rawPlan) {
+          this.logger.warn('Raw plan data not found', { instance_id: dbInstance.instance_id });
+          continue;
+        }
+
+        // Calculate hourly price: monthly_cost / 730 hours
+        const hourlyPrice = rawPlan.monthly_cost / 730;
+
+        batch.push({
+          instance_type_id: instanceTypeId,
+          region_id: regionId,
+          hourly_price: hourlyPrice,
+          monthly_price: rawPlan.monthly_cost,
+          currency: 'USD',
+          available: 1,
+        });
+
+        if (batch.length >= BATCH_SIZE) {
+          yield batch;
+          batch = [];
+        }
+      }
+    }
+
+    // Yield remaining records
+    if (batch.length > 0) {
+      yield batch;
+    }
+  }
+
  /**
   * Create connector for a specific provider
   *
   * @param provider - Provider name
-   * @returns Connector instance for the provider
+   * @param providerId - Database provider ID
+   * @returns Connector adapter instance for the provider
   * @throws Error if provider is not supported
   */
-  private async createConnector(provider: string): Promise<CloudConnector> {
+  private async createConnector(provider: string, providerId: number): Promise<SyncConnectorAdapter> {
    switch (provider.toLowerCase()) {
      case 'linode': {
        const connector = new LinodeConnector(this.vault);
+        // Cache instance types for pricing extraction
+        let cachedInstanceTypes: Awaited<ReturnType<typeof connector.fetchInstanceTypes>> | null = null;
+
        return {
          authenticate: () => connector.initialize(),
          getRegions: async () => {
            const regions = await connector.fetchRegions();
-            const providerRecord = await this.repos.providers.findByName('linode');
-            const providerId = providerRecord?.id ?? 0;
            return regions.map(r => connector.normalizeRegion(r, providerId));
          },
          getInstanceTypes: async () => {
            const instances = await connector.fetchInstanceTypes();
-            const providerRecord = await this.repos.providers.findByName('linode');
-            const providerId = providerRecord?.id ?? 0;
+            cachedInstanceTypes = instances; // Cache for pricing
            return instances.map(i => connector.normalizeInstance(i, providerId));
          },
-          getPricing: async () => {
-            // Linode pricing is included in instance types
-            return [];
+          getPricing: async (
+            instanceTypeIds: number[],
+            regionIds: number[],
+            dbInstanceMap: Map<number, { instance_id: string }>
+          ): Promise<number> => {
+            /**
+             * Linode Pricing Extraction Strategy (Generator Pattern):
+             *
+             * Linode pricing is embedded in instance type data (price.hourly, price.monthly).
+             * Generate all region × instance combinations using generator pattern.
+             *
+             * Expected volume: ~200 instances × 20 regions = ~4,000 pricing records
+             * Generator pattern with 100 records/batch minimizes memory usage
+             * Each batch is immediately persisted to database to avoid memory buildup
+             *
+             * Memory savings: ~95% (4,000 records → 100 records in memory at a time)
+             *
+             * Manual Test:
+             * 1. Run sync: curl -X POST http://localhost:8787/api/sync/linode
+             * 2. Verify pricing count: wrangler d1 execute cloud-instances-db --local --command "SELECT COUNT(*) FROM pricing WHERE instance_type_id IN (SELECT id FROM instance_types WHERE provider_id = (SELECT id FROM providers WHERE name = 'linode'))"
+             * 3. Sample pricing: wrangler d1 execute cloud-instances-db --local --command "SELECT p.*, i.instance_name, r.region_code FROM pricing p JOIN instance_types i ON p.instance_type_id = i.id JOIN regions r ON p.region_id = r.id WHERE i.provider_id = (SELECT id FROM providers WHERE name = 'linode') LIMIT 10"
+             * 4. Verify data integrity: wrangler d1 execute cloud-instances-db --local --command "SELECT COUNT(*) FROM pricing WHERE hourly_price = 0 OR monthly_price = 0"
+             */
+
+            // Re-fetch instance types if not cached
+            if (!cachedInstanceTypes) {
+              this.logger.info('Fetching instance types for pricing extraction');
+              cachedInstanceTypes = await connector.fetchInstanceTypes();
+            }
+
+            // Create lookup map for raw instance data by instance_id (API ID)
+            const rawInstanceMap = new Map(
+              cachedInstanceTypes.map(i => [i.id, i])
+            );
+
+            // Use generator pattern for memory-efficient processing
+            const pricingGenerator = this.generateLinodePricingBatches(
+              instanceTypeIds,
+              regionIds,
+              dbInstanceMap,
+              rawInstanceMap,
+              this.env
+            );
+
+            // Process batches incrementally
+            let totalCount = 0;
+            for (const batch of pricingGenerator) {
+              const batchCount = await this.repos.pricing.upsertMany(batch);
+              totalCount += batchCount;
+            }
+
+            this.logger.info('Generated and upserted pricing records for Linode', { count: totalCount });
+
+            // Return total count of processed records
+            return totalCount;
          },
        };
      }

      case 'vultr': {
        const connector = new VultrConnector(this.vault);
+        // Cache plans for pricing extraction
+        let cachedPlans: Awaited<ReturnType<typeof connector.fetchPlans>> | null = null;
+
        return {
          authenticate: () => connector.initialize(),
          getRegions: async () => {
            const regions = await connector.fetchRegions();
-            const providerRecord = await this.repos.providers.findByName('vultr');
-            const providerId = providerRecord?.id ?? 0;
            return regions.map(r => connector.normalizeRegion(r, providerId));
          },
          getInstanceTypes: async () => {
            const plans = await connector.fetchPlans();
-            const providerRecord = await this.repos.providers.findByName('vultr');
-            const providerId = providerRecord?.id ?? 0;
+            cachedPlans = plans; // Cache for pricing
            return plans.map(p => connector.normalizeInstance(p, providerId));
          },
-          getPricing: async () => {
-            // Vultr pricing is included in plans
-            return [];
+          getPricing: async (
+            instanceTypeIds: number[],
+            regionIds: number[],
+            dbInstanceMap: Map<number, { instance_id: string }>
+          ): Promise<number> => {
+            /**
+             * Vultr Pricing Extraction Strategy (Generator Pattern):
+             *
+             * Vultr pricing is embedded in plan data (monthly_cost).
+             * Generate all region × plan combinations using generator pattern.
+             *
+             * Expected volume: ~100 plans × 20 regions = ~2,000 pricing records
+             * Generator pattern with 100 records/batch minimizes memory usage
+             * Each batch is immediately persisted to database to avoid memory buildup
+             *
+             * Memory savings: ~95% (2,000 records → 100 records in memory at a time)
+             *
+             * Manual Test:
+             * 1. Run sync: curl -X POST http://localhost:8787/api/sync/vultr
+             * 2. Verify pricing count: wrangler d1 execute cloud-instances-db --local --command "SELECT COUNT(*) FROM pricing WHERE instance_type_id IN (SELECT id FROM instance_types WHERE provider_id = (SELECT id FROM providers WHERE name = 'vultr'))"
+             * 3. Sample pricing: wrangler d1 execute cloud-instances-db --local --command "SELECT p.*, i.instance_name, r.region_code FROM pricing p JOIN instance_types i ON p.instance_type_id = i.id JOIN regions r ON p.region_id = r.id WHERE i.provider_id = (SELECT id FROM providers WHERE name = 'vultr') LIMIT 10"
+             * 4. Verify data integrity: wrangler d1 execute cloud-instances-db --local --command "SELECT COUNT(*) FROM pricing WHERE hourly_price = 0 OR monthly_price = 0"
+             */
+
+            // Re-fetch plans if not cached
+            if (!cachedPlans) {
+              this.logger.info('Fetching plans for pricing extraction');
+              cachedPlans = await connector.fetchPlans();
+            }
+
+            // Create lookup map for raw plan data by plan ID (API ID)
+            const rawPlanMap = new Map(
+              cachedPlans.map(p => [p.id, p])
+            );
+
+            // Use generator pattern for memory-efficient processing
+            const pricingGenerator = this.generateVultrPricingBatches(
+              instanceTypeIds,
+              regionIds,
+              dbInstanceMap,
+              rawPlanMap,
+              this.env
+            );
+
+            // Process batches incrementally
+            let totalCount = 0;
+            for (const batch of pricingGenerator) {
+              const batchCount = await this.repos.pricing.upsertMany(batch);
+              totalCount += batchCount;
+            }
+
+            this.logger.info('Generated and upserted pricing records for Vultr', { count: totalCount });
+
+            // Return total count of processed records
+            return totalCount;
          },
        };
      }

      case 'aws': {
        const connector = new AWSConnector(this.vault);
+        // Cache instance types for pricing extraction
+        let cachedInstanceTypes: Awaited<ReturnType<typeof connector.fetchInstanceTypes>> | null = null;
+
        return {
          authenticate: () => connector.initialize(),
          getRegions: async () => {
            const regions = await connector.fetchRegions();
-            const providerRecord = await this.repos.providers.findByName('aws');
-            const providerId = providerRecord?.id ?? 0;
            return regions.map(r => connector.normalizeRegion(r, providerId));
          },
          getInstanceTypes: async () => {
            const instances = await connector.fetchInstanceTypes();
-            const providerRecord = await this.repos.providers.findByName('aws');
-            const providerId = providerRecord?.id ?? 0;
+            cachedInstanceTypes = instances; // Cache for pricing
            return instances.map(i => connector.normalizeInstance(i, providerId));
          },
-          getPricing: async () => {
-            // AWS pricing is included in instance types from ec2.shop
-            return [];
+          getPricing: async (
+            instanceTypeIds: number[],
+            regionIds: number[],
+            dbInstanceMap: Map<number, { instance_id: string }>
+          ): Promise<number> => {
+            /**
+             * AWS Pricing Extraction Strategy (Generator Pattern):
+             *
+             * AWS pricing from ec2.shop is region-agnostic (same price globally).
+             * Generate all region × instance combinations using generator pattern.
+             *
+             * Expected volume: ~870 instances × 29 regions = ~25,230 pricing records
+             * Generator pattern with 100 records/batch minimizes memory usage
+             * Each batch is immediately persisted to database to avoid memory buildup
+             *
+             * Manual Test:
+             * 1. Run sync: curl -X POST http://localhost:8787/api/sync/aws
+             * 2. Verify pricing count: wrangler d1 execute cloud-instances-db --local --command "SELECT COUNT(*) FROM pricing WHERE instance_type_id IN (SELECT id FROM instance_types WHERE provider_id = (SELECT id FROM providers WHERE name = 'aws'))"
+             * 3. Sample pricing: wrangler d1 execute cloud-instances-db --local --command "SELECT p.*, i.instance_name, r.region_code FROM pricing p JOIN instance_types i ON p.instance_type_id = i.id JOIN regions r ON p.region_id = r.id WHERE i.provider_id = (SELECT id FROM providers WHERE name = 'aws') LIMIT 10"
+             * 4. Verify data integrity: wrangler d1 execute cloud-instances-db --local --command "SELECT COUNT(*) FROM pricing WHERE hourly_price = 0 OR monthly_price = 0"
+             */
+
+            // Re-fetch instance types if not cached
+            if (!cachedInstanceTypes) {
+              this.logger.info('Fetching instance types for pricing extraction');
+              cachedInstanceTypes = await connector.fetchInstanceTypes();
+            }
+
+            // Create lookup map for raw instance data by instance_id (API ID)
+            const rawInstanceMap = new Map(
+              cachedInstanceTypes.map(i => [i.InstanceType, i])
+            );
+
+            // Use generator pattern for memory-efficient processing
+            const pricingGenerator = this.generateAWSPricingBatches(
+              instanceTypeIds,
+              regionIds,
+              dbInstanceMap,
+              rawInstanceMap
+            );
+
+            // Process batches incrementally
+            let totalCount = 0;
+            for (const batch of pricingGenerator) {
+              const batchCount = await this.repos.pricing.upsertMany(batch);
+              totalCount += batchCount;
+            }
+
+            this.logger.info('Generated and upserted pricing records for AWS', { count: totalCount });
+
+            // Return total count of processed records
+            return totalCount;
          },
        };
      }