cloud-server/src/services/sync.ts

/**
 * Sync Service - Orchestrates synchronization of cloud provider data
 *
 * Features:
 * - Multi-provider synchronization (Linode, Vultr, AWS)
 * - Stage-based sync process with error recovery
 * - Provider status tracking and reporting
 * - Batch operations for efficiency
 *
 * @example
 * const orchestrator = new SyncOrchestrator(db, env);
 * const report = await orchestrator.syncAll(['linode']);
 */

import { LinodeConnector } from '../connectors/linode';
import { VultrConnector } from '../connectors/vultr';
import { AWSConnector } from '../connectors/aws';
import { RepositoryFactory } from '../repositories';
import { createLogger } from '../utils/logger';
import { calculateRetailHourly, calculateRetailMonthly, SUPPORTED_PROVIDERS } from '../constants';
import type {
  Env,
  ProviderSyncResult,
  SyncReport,
  RegionInput,
  InstanceTypeInput,
  PricingInput,
  GpuInstanceInput,
  GpuPricingInput,
  G8InstanceInput,
  G8PricingInput,
  VpuInstanceInput,
  VpuPricingInput,
} from '../types';
import { SyncStage } from '../types';

/**
 * Wraps a promise with a timeout
 * @param promise - The promise to wrap
 * @param ms - Timeout in milliseconds
 * @param operation - Operation name for error message
 * @returns Promise result if completed within timeout
 * @throws Error if operation times out
 */
async function withTimeout<T>(promise: Promise<T>, ms: number, operation: string): Promise<T> {
  let timeoutId: ReturnType<typeof setTimeout>;
  const timeoutPromise = new Promise<never>((_, reject) => {
    timeoutId = setTimeout(() => reject(new Error(`${operation} timed out after ${ms}ms`)), ms);
  });

  try {
    return await Promise.race([promise, timeoutPromise]);
  } finally {
    clearTimeout(timeoutId!);
  }
}

/**
 * Cloud provider connector interface for SyncOrchestrator
 *
 * This is an adapter interface used by SyncOrchestrator to abstract
 * provider-specific implementations. Actual provider connectors (LinodeConnector,
 * VultrConnector, etc.) extend CloudConnector from base.ts and are wrapped
 * by this interface in createConnector().
 */
export interface SyncConnectorAdapter {
  /** Authenticate and validate credentials */
  authenticate(): Promise<void>;

  /** Fetch all available regions (normalized) */
  getRegions(): Promise<RegionInput[]>;

  /** Fetch all instance types (normalized) */
  getInstanceTypes(): Promise<InstanceTypeInput[]>;

  /** Fetch GPU instances (optional, only for providers with GPU support) */
  getGpuInstances?(): Promise<GpuInstanceInput[]>;

  /** Fetch G8 instances (optional, only for Linode) */
  getG8Instances?(): Promise<G8InstanceInput[]>;

  /** Fetch VPU instances (optional, only for Linode) */
  getVpuInstances?(): Promise<VpuInstanceInput[]>;

  /**
   * Fetch pricing data for instances and regions
   * @param instanceTypeIds - Array of database instance type IDs
   * @param regionIds - Array of database region IDs
   * @param dbInstanceMap - Map of DB instance type ID to instance_id (API ID) for avoiding redundant queries
   * @param dbGpuMap - Map of GPU instance IDs (optional)
   * @param dbG8Map - Map of G8 instance IDs (optional)
   * @param dbVpuMap - Map of VPU instance IDs (optional)
   * @returns Array of pricing records OR number of records if batched internally
   */
  getPricing(
    instanceTypeIds: number[],
    regionIds: number[],
    dbInstanceMap: Map<number, { instance_id: string }>,
    dbGpuMap?: Map<number, { instance_id: string }>,
    dbG8Map?: Map<number, { instance_id: string }>,
    dbVpuMap?: Map<number, { instance_id: string }>
  ): Promise<PricingInput[] | number>;
}

/**
 * Sync orchestrator for managing provider synchronization
 */
export class SyncOrchestrator {
  private repos: RepositoryFactory;
  private logger: ReturnType<typeof createLogger>;

  constructor(
    db: D1Database,
    private env: Env
  ) {
    this.repos = new RepositoryFactory(db, env);
    this.logger = createLogger('[SyncOrchestrator]', env);
    this.logger.info('Initialized');
  }

  /**
   * Synchronize a single provider
   *
   * @param provider - Provider name (linode, vultr, aws)
   * @returns Sync result with statistics and error information
   */
  async syncProvider(provider: string): Promise<ProviderSyncResult> {
    const startTime = Date.now();
    let stage = SyncStage.INIT;

    this.logger.info('Starting sync for provider', { provider });

    try {
      // Stage 1: Initialize - Fetch provider record ONCE
      stage = SyncStage.INIT;
      const providerRecord = await this.repos.providers.findByName(provider);
      if (!providerRecord) {
        throw new Error(`Provider not found in database: ${provider}`);
      }

      // Update provider status to syncing
      await this.repos.providers.updateSyncStatus(provider, 'syncing');
      this.logger.info(`${provider} → ${stage}`);

      // Stage 2: Initialize connector and authenticate
      const connector = await this.createConnector(provider, providerRecord.id);
      await withTimeout(connector.authenticate(), 10000, `${provider} authentication`);
      this.logger.info(`${provider} → initialized`);


      // Stage 3: Fetch regions from provider API
      stage = SyncStage.FETCH_REGIONS;
      const regions = await withTimeout(connector.getRegions(), 15000, `${provider} fetch regions`);
      this.logger.info(`${provider} → ${stage}`, { regions: regions.length });

      // Stage 4: Fetch instance types from provider API
      stage = SyncStage.FETCH_INSTANCES;
      const instances = await withTimeout(connector.getInstanceTypes(), 30000, `${provider} fetch instances`);
      this.logger.info(`${provider} → ${stage}`, { instances: instances.length });

      // Stage 5: Normalize data (add provider_id)
      stage = SyncStage.NORMALIZE;
      const normalizedRegions = regions.map(r => ({
        ...r,
        provider_id: providerRecord.id,
      }));
      const normalizedInstances = instances.map(i => ({
        ...i,
        provider_id: providerRecord.id,
      }));
      this.logger.info(`${provider} → ${stage}`);

      // Stage 6: Persist to database
      stage = SyncStage.PERSIST;
      const regionsCount = await this.repos.regions.upsertMany(
        providerRecord.id,
        normalizedRegions
      );

      // Persist regular instances (already filtered in getInstanceTypes)
      const regularInstancesCount = await this.repos.instances.upsertMany(
        providerRecord.id,
        normalizedInstances
      );

      // Handle specialized instances separately for Linode and Vultr
      let gpuInstancesCount = 0;
      let g8InstancesCount = 0;
      let vpuInstancesCount = 0;

      if (provider.toLowerCase() === 'linode') {
        // GPU instances
        if (connector.getGpuInstances) {
          const gpuInstances = await withTimeout(connector.getGpuInstances(), 15000, `${provider} fetch GPU instances`);
          if (gpuInstances && gpuInstances.length > 0) {
            gpuInstancesCount = await this.repos.gpuInstances.upsertMany(
              providerRecord.id,
              gpuInstances
            );
          }
        }

        // G8 instances
        if (connector.getG8Instances) {
          const g8Instances = await withTimeout(connector.getG8Instances(), 15000, `${provider} fetch G8 instances`);
          if (g8Instances && g8Instances.length > 0) {
            g8InstancesCount = await this.repos.g8Instances.upsertMany(
              providerRecord.id,
              g8Instances
            );
          }
        }

        // VPU instances
        if (connector.getVpuInstances) {
          const vpuInstances = await withTimeout(connector.getVpuInstances(), 15000, `${provider} fetch VPU instances`);
          if (vpuInstances && vpuInstances.length > 0) {
            vpuInstancesCount = await this.repos.vpuInstances.upsertMany(
              providerRecord.id,
              vpuInstances
            );
          }
        }
      }

      // Handle Vultr GPU instances
      if (provider.toLowerCase() === 'vultr') {
        if (connector.getGpuInstances) {
          const gpuInstances = await withTimeout(connector.getGpuInstances(), 15000, `${provider} fetch GPU instances`);
          if (gpuInstances && gpuInstances.length > 0) {
            gpuInstancesCount = await this.repos.gpuInstances.upsertMany(
              providerRecord.id,
              gpuInstances
            );
          }
        }
      }

      const instancesCount = regularInstancesCount + gpuInstancesCount + g8InstancesCount + vpuInstancesCount;

      // Fetch pricing data - need instance and region IDs from DB
      // Use D1 batch to reduce query count (fetch all instance types in one batch)
      const batchQueries = [
        this.repos.db.prepare('SELECT id, region_code FROM regions WHERE provider_id = ?').bind(providerRecord.id),
        this.repos.db.prepare('SELECT id, instance_id FROM instance_types WHERE provider_id = ?').bind(providerRecord.id),
        this.repos.db.prepare('SELECT id, instance_id FROM gpu_instances WHERE provider_id = ?').bind(providerRecord.id),
        this.repos.db.prepare('SELECT id, instance_id FROM g8_instances WHERE provider_id = ?').bind(providerRecord.id),
        this.repos.db.prepare('SELECT id, instance_id FROM vpu_instances WHERE provider_id = ?').bind(providerRecord.id)
      ];

      const [dbRegionsResult, dbInstancesResult, dbGpuResult, dbG8Result, dbVpuResult] = await this.repos.db.batch(batchQueries);

      if (!dbRegionsResult.success || !dbInstancesResult.success) {
        throw new Error('Failed to fetch regions/instances for pricing');
      }

      // Validate and extract region IDs
      if (!Array.isArray(dbRegionsResult.results)) {
        throw new Error('Unexpected database result format for regions');
      }
      const regionIds = dbRegionsResult.results.map((r: any) => {
        if (typeof r?.id !== 'number') {
          throw new Error('Invalid region id in database result');
        }
        return r.id;
      });

      // Validate and extract instance type data
      if (!Array.isArray(dbInstancesResult.results)) {
        throw new Error('Unexpected database result format for instances');
      }
      const dbInstancesData = dbInstancesResult.results.map((i: any) => {
        if (typeof i?.id !== 'number' || typeof i?.instance_id !== 'string') {
          throw new Error('Invalid instance data in database result');
        }
        return { id: i.id, instance_id: i.instance_id };
      });
      const instanceTypeIds = dbInstancesData.map(i => i.id);

      // Create instance mapping to avoid redundant queries in getPricing
      const dbInstanceMap = new Map(
        dbInstancesData.map(i => [i.id, { instance_id: i.instance_id }])
      );

      // Create specialized instance mappings with validation
      if (!Array.isArray(dbGpuResult.results)) {
        throw new Error('Unexpected database result format for GPU instances');
      }
      const dbGpuMap = new Map(
        dbGpuResult.results.map((i: any) => {
          if (typeof i?.id !== 'number' || typeof i?.instance_id !== 'string') {
            throw new Error('Invalid GPU instance data in database result');
          }
          return [i.id, { instance_id: i.instance_id }];
        })
      );

      if (!Array.isArray(dbG8Result.results)) {
        throw new Error('Unexpected database result format for G8 instances');
      }
      const dbG8Map = new Map(
        dbG8Result.results.map((i: any) => {
          if (typeof i?.id !== 'number' || typeof i?.instance_id !== 'string') {
            throw new Error('Invalid G8 instance data in database result');
          }
          return [i.id, { instance_id: i.instance_id }];
        })
      );

      if (!Array.isArray(dbVpuResult.results)) {
        throw new Error('Unexpected database result format for VPU instances');
      }
      const dbVpuMap = new Map(
        dbVpuResult.results.map((i: any) => {
          if (typeof i?.id !== 'number' || typeof i?.instance_id !== 'string') {
            throw new Error('Invalid VPU instance data in database result');
          }
          return [i.id, { instance_id: i.instance_id }];
        })
      );

      // Get pricing data - may return array or count depending on provider
      // Pass all instance maps for specialized pricing
      const pricingResult = await withTimeout(
        connector.getPricing(
          instanceTypeIds,
          regionIds,
          dbInstanceMap,
          dbGpuMap,
          dbG8Map,
          dbVpuMap
        ),
        60000,
        `${provider} fetch pricing`
      );

      // Handle both return types: array (Linode, Vultr) or number (AWS with generator)
      let pricingCount = 0;
      if (typeof pricingResult === 'number') {
        // Provider processed batches internally, returned count
        pricingCount = pricingResult;
      } else if (pricingResult.length > 0) {
        // Provider returned pricing array, upsert it
        pricingCount = await this.repos.pricing.upsertMany(pricingResult);
      }

      this.logger.info(`${provider} → ${stage}`, {
        regions: regionsCount,
        regular_instances: regularInstancesCount,
        gpu_instances: gpuInstancesCount,
        g8_instances: g8InstancesCount,
        vpu_instances: vpuInstancesCount,
        total_instances: instancesCount,
        pricing: pricingCount
      });

      // Stage 7: Validate
      stage = SyncStage.VALIDATE;
      if (regionsCount === 0 || instancesCount === 0) {
        throw new Error('No data was synced - possible API or parsing issue');
      }
      this.logger.info(`${provider} → ${stage}`);

      // Stage 8: Sync Anvil Pricing (if applicable)
      stage = SyncStage.SYNC_ANVIL_PRICING;
      let anvilPricingCount = 0;
      try {
        anvilPricingCount = await this.syncAnvilPricing(provider);
        if (anvilPricingCount > 0) {
          this.logger.info(`${provider} → ${stage}`, { anvil_pricing: anvilPricingCount });
        }
      } catch (anvilError) {
        // Log error but don't fail the entire sync
        this.logger.error('Anvil pricing sync failed', {
          provider,
          error: anvilError instanceof Error ? anvilError.message : String(anvilError)
        });
      }

      // Stage 9: Complete - Update provider status to success
      stage = SyncStage.COMPLETE;
      await this.repos.providers.updateSyncStatus(provider, 'success');

      const duration = Date.now() - startTime;
      this.logger.info(`${provider} → ${stage}`, { duration_ms: duration });

      return {
        provider,
        success: true,
        regions_synced: regionsCount,
        instances_synced: instancesCount,
        pricing_synced: pricingCount,
        duration_ms: duration,
      };

    } catch (error) {
      const duration = Date.now() - startTime;
      const errorMessage = error instanceof Error ? error.message : 'Unknown error';

      this.logger.error(`${provider} failed at ${stage}`, { error: error instanceof Error ? error.message : String(error), stage });

      // Update provider status to error
      try {
        await this.repos.providers.updateSyncStatus(provider, 'error', errorMessage);
      } catch (statusError) {
        this.logger.error('Failed to update provider status', { error: statusError instanceof Error ? statusError.message : String(statusError) });
      }

      return {
        provider,
        success: false,
        regions_synced: 0,
        instances_synced: 0,
        pricing_synced: 0,
        duration_ms: duration,
        error: errorMessage,
        error_details: {
          stage,
          message: errorMessage,
          // Stack trace logged server-side only, not exposed to clients
        },
      };
    }
  }

  /**
   * Synchronize all providers
   *
   * IMPORTANT: Providers are synced sequentially (not in parallel) to avoid
   * exceeding Cloudflare Workers' 30-second CPU time limit. Each provider
   * sync involves multiple API calls and database operations.
   *
   * For production deployments with large datasets, consider using
   * Cloudflare Queues to process each provider as a separate job.
   *
   * @param providers - Array of provider names to sync (defaults to all supported providers)
   * @returns Complete sync report with statistics
   */
  async syncAll(providers: string[] = [...SUPPORTED_PROVIDERS]): Promise<SyncReport> {
    const startedAt = new Date().toISOString();
    const startTime = Date.now();

    this.logger.info('Starting sequential sync for providers', { providers: providers.join(', ') });

    // Run provider syncs sequentially to avoid CPU timeout
    // Each provider sync is independent and can complete within time limits
    const providerResults: ProviderSyncResult[] = [];

    for (const provider of providers) {
      try {
        const result = await this.syncProvider(provider);
        providerResults.push(result);

        // Log progress after each provider
        this.logger.info('Provider sync completed', {
          provider,
          success: result.success,
          elapsed_ms: Date.now() - startTime
        });
      } catch (error) {
        // Handle unexpected errors
        providerResults.push({
          provider,
          success: false,
          regions_synced: 0,
          instances_synced: 0,
          pricing_synced: 0,
          duration_ms: 0,
          error: error instanceof Error ? error.message : 'Unknown error',
        });
      }
    }

    const completedAt = new Date().toISOString();
    const totalDuration = Date.now() - startTime;

    // Calculate summary
    const successful = providerResults.filter(r => r.success);
    const failed = providerResults.filter(r => !r.success);

    const summary = {
      total_providers: providers.length,
      successful_providers: successful.length,
      failed_providers: failed.length,
      total_regions: providerResults.reduce((sum, r) => sum + r.regions_synced, 0),
      total_instances: providerResults.reduce((sum, r) => sum + r.instances_synced, 0),
      total_pricing: providerResults.reduce((sum, r) => sum + r.pricing_synced, 0),
    };

    const report: SyncReport = {
      success: failed.length === 0,
      started_at: startedAt,
      completed_at: completedAt,
      total_duration_ms: totalDuration,
      providers: providerResults,
      summary,
    };

    this.logger.info('Sync complete', {
      total: summary.total_providers,
      success: summary.successful_providers,
      failed: summary.failed_providers,
      duration_ms: totalDuration,
    });

    return report;
  }

  /**
   * Generate AWS pricing records in batches using Generator pattern
   * Minimizes memory usage by yielding batches of 100 records at a time
   *
   * @param instanceTypeIds - Array of database instance type IDs
   * @param regionIds - Array of database region IDs
   * @param dbInstanceMap - Map of instance type ID to DB instance data
   * @param rawInstanceMap - Map of instance_id (API ID) to raw AWS data
   * @yields Batches of PricingInput records (100 per batch)
   *
   * Manual Test:
   * Generator yields ~252 batches for ~25,230 total records (870 instances × 29 regions)
   */
  private *generateAWSPricingBatches(
    instanceTypeIds: number[],
    regionIds: number[],
    dbInstanceMap: Map<number, { instance_id: string }>,
    rawInstanceMap: Map<string, { Cost: number; MonthlyPrice: number }>
  ): Generator<PricingInput[], void, void> {
    const BATCH_SIZE = 500;
    let batch: PricingInput[] = [];

    for (const regionId of regionIds) {
      for (const instanceTypeId of instanceTypeIds) {
        const dbInstance = dbInstanceMap.get(instanceTypeId);
        if (!dbInstance) {
          this.logger.warn('Instance type not found', { instanceTypeId });
          continue;
        }

        const rawInstance = rawInstanceMap.get(dbInstance.instance_id);
        if (!rawInstance) {
          this.logger.warn('Raw instance data not found', { instance_id: dbInstance.instance_id });
          continue;
        }

        batch.push({
          instance_type_id: instanceTypeId,
          region_id: regionId,
          hourly_price: rawInstance.Cost,
          monthly_price: rawInstance.MonthlyPrice,
          currency: 'USD',
          available: 1,
        });

        if (batch.length >= BATCH_SIZE) {
          yield batch;
          batch = [];
        }
      }
    }

    // Yield remaining records
    if (batch.length > 0) {
      yield batch;
    }
  }

  /**
   * Generate Linode pricing records in batches using Generator pattern
   * Minimizes memory usage by yielding batches at a time (default: 100)
   *
   * @param instanceTypeIds - Array of database instance type IDs
   * @param regionIds - Array of database region IDs
   * @param dbInstanceMap - Map of instance type ID to DB instance data
   * @param rawInstanceMap - Map of instance_id (API ID) to raw Linode data
   * @param env - Environment configuration for SYNC_BATCH_SIZE
   * @yields Batches of PricingInput records (configurable batch size)
   *
   * Manual Test:
   * For typical Linode deployment (~200 instance types × 20 regions = 4,000 records):
   * - Default batch size (100): ~40 batches
   * - Memory savings: ~95% (4,000 records → 100 records in memory)
   * - Verify: Check logs for "Generated and upserted pricing records for Linode"
   */
  private *generateLinodePricingBatches(
    instanceTypeIds: number[],
    regionIds: number[],
    dbInstanceMap: Map<number, { instance_id: string }>,
    rawInstanceMap: Map<string, { id: string; price: { hourly: number; monthly: number } }>,
    env?: Env
  ): Generator<PricingInput[], void, void> {
    const BATCH_SIZE = Math.min(
      Math.max(parseInt(env?.SYNC_BATCH_SIZE || '500', 10) || 500, 1),
      1000
    );
    let batch: PricingInput[] = [];

    for (const regionId of regionIds) {
      for (const instanceTypeId of instanceTypeIds) {
        const dbInstance = dbInstanceMap.get(instanceTypeId);
        if (!dbInstance) {
          this.logger.warn('Instance type not found', { instanceTypeId });
          continue;
        }

        const rawInstance = rawInstanceMap.get(dbInstance.instance_id);
        if (!rawInstance) {
          this.logger.warn('Raw instance data not found', { instance_id: dbInstance.instance_id });
          continue;
        }

        batch.push({
          instance_type_id: instanceTypeId,
          region_id: regionId,
          hourly_price: rawInstance.price.hourly,
          monthly_price: rawInstance.price.monthly,
          currency: 'USD',
          available: 1,
        });

        if (batch.length >= BATCH_SIZE) {
          yield batch;
          batch = [];
        }
      }
    }

    // Yield remaining records
    if (batch.length > 0) {
      yield batch;
    }
  }

  /**
   * Generate Vultr pricing records in batches using Generator pattern
   * Minimizes memory usage by yielding batches at a time (default: 100)
   *
   * @param instanceTypeIds - Array of database instance type IDs
   * @param regionIds - Array of database region IDs
   * @param dbInstanceMap - Map of instance type ID to DB instance data
   * @param rawPlanMap - Map of plan_id (API ID) to raw Vultr plan data
   * @param env - Environment configuration for SYNC_BATCH_SIZE
   * @yields Batches of PricingInput records (configurable batch size)
   *
   * Manual Test:
   * For typical Vultr deployment (~100 plans × 20 regions = 2,000 records):
   * - Default batch size (100): ~20 batches
   * - Memory savings: ~95% (2,000 records → 100 records in memory)
   * - Verify: Check logs for "Generated and upserted pricing records for Vultr"
   */
  private *generateVultrPricingBatches(
    instanceTypeIds: number[],
    regionIds: number[],
    dbInstanceMap: Map<number, { instance_id: string }>,
    rawPlanMap: Map<string, { id: string; monthly_cost: number }>,
    env?: Env
  ): Generator<PricingInput[], void, void> {
    const BATCH_SIZE = Math.min(
      Math.max(parseInt(env?.SYNC_BATCH_SIZE || '500', 10) || 500, 1),
      1000
    );
    let batch: PricingInput[] = [];

    for (const regionId of regionIds) {
      for (const instanceTypeId of instanceTypeIds) {
        const dbInstance = dbInstanceMap.get(instanceTypeId);
        if (!dbInstance) {
          this.logger.warn('Instance type not found', { instanceTypeId });
          continue;
        }

        const rawPlan = rawPlanMap.get(dbInstance.instance_id);
        if (!rawPlan) {
          this.logger.warn('Raw plan data not found', { instance_id: dbInstance.instance_id });
          continue;
        }

        // Calculate hourly price: monthly_cost / 730 hours
        const hourlyPrice = rawPlan.monthly_cost / 730;

        batch.push({
          instance_type_id: instanceTypeId,
          region_id: regionId,
          hourly_price: hourlyPrice,
          monthly_price: rawPlan.monthly_cost,
          currency: 'USD',
          available: 1,
        });

        if (batch.length >= BATCH_SIZE) {
          yield batch;
          batch = [];
        }
      }
    }

    // Yield remaining records
    if (batch.length > 0) {
      yield batch;
    }
  }

  /**
   * Generate Linode GPU pricing records in batches using Generator pattern
   * Minimizes memory usage by yielding batches at a time (default: 100)
   *
   * @param gpuInstanceTypeIds - Array of database GPU instance type IDs
   * @param regionIds - Array of database region IDs
   * @param dbGpuInstanceMap - Map of GPU instance type ID to DB instance data
   * @param rawInstanceMap - Map of instance_id (API ID) to raw Linode data
   * @param env - Environment configuration for SYNC_BATCH_SIZE
   * @yields Batches of GpuPricingInput records (configurable batch size)
   *
   * Manual Test:
   * For typical Linode GPU instances (~10 GPU types × 20 regions = 200 records):
   * - Default batch size (100): ~2 batches
   * - Memory savings: ~50% (200 records → 100 records in memory)
   * - Verify: Check logs for "Generated and upserted GPU pricing records for Linode"
   */
  private *generateLinodeGpuPricingBatches(
    gpuInstanceTypeIds: number[],
    regionIds: number[],
    dbGpuInstanceMap: Map<number, { instance_id: string }>,
    rawInstanceMap: Map<string, { id: string; price: { hourly: number; monthly: number } }>,
    env?: Env
  ): Generator<GpuPricingInput[], void, void> {
    const BATCH_SIZE = Math.min(
      Math.max(parseInt(env?.SYNC_BATCH_SIZE || '500', 10) || 500, 1),
      1000
    );
    let batch: GpuPricingInput[] = [];

    for (const regionId of regionIds) {
      for (const gpuInstanceId of gpuInstanceTypeIds) {
        const dbInstance = dbGpuInstanceMap.get(gpuInstanceId);
        if (!dbInstance) {
          this.logger.warn('GPU instance type not found', { gpuInstanceId });
          continue;
        }

        const rawInstance = rawInstanceMap.get(dbInstance.instance_id);
        if (!rawInstance) {
          this.logger.warn('Raw GPU instance data not found', { instance_id: dbInstance.instance_id });
          continue;
        }

        batch.push({
          gpu_instance_id: gpuInstanceId,
          region_id: regionId,
          hourly_price: rawInstance.price.hourly,
          monthly_price: rawInstance.price.monthly,
          currency: 'USD',
          available: 1,
        });

        if (batch.length >= BATCH_SIZE) {
          yield batch;
          batch = [];
        }
      }
    }

    // Yield remaining records
    if (batch.length > 0) {
      yield batch;
    }
  }

  /**
   * Generate Vultr GPU pricing records in batches using Generator pattern
   * Minimizes memory usage by yielding batches at a time (default: 100)
   *
   * @param gpuInstanceTypeIds - Array of database GPU instance type IDs
   * @param regionIds - Array of database region IDs
   * @param dbGpuInstanceMap - Map of GPU instance type ID to DB instance data
   * @param rawPlanMap - Map of plan_id (API ID) to raw Vultr plan data
   * @param env - Environment configuration for SYNC_BATCH_SIZE
   * @yields Batches of GpuPricingInput records (configurable batch size)
   *
   * Manual Test:
   * For typical Vultr GPU instances (~35 vcg types × 20 regions = 700 records):
   * - Default batch size (100): ~7 batches
   * - Memory savings: ~85% (700 records → 100 records in memory)
   * - Verify: Check logs for "Generated and upserted GPU pricing records for Vultr"
   */
  private *generateVultrGpuPricingBatches(
    gpuInstanceTypeIds: number[],
    regionIds: number[],
    dbGpuInstanceMap: Map<number, { instance_id: string }>,
    rawPlanMap: Map<string, { id: string; monthly_cost: number }>,
    env?: Env
  ): Generator<GpuPricingInput[], void, void> {
    const BATCH_SIZE = Math.min(
      Math.max(parseInt(env?.SYNC_BATCH_SIZE || '500', 10) || 500, 1),
      1000
    );
    let batch: GpuPricingInput[] = [];

    for (const regionId of regionIds) {
      for (const gpuInstanceId of gpuInstanceTypeIds) {
        const dbInstance = dbGpuInstanceMap.get(gpuInstanceId);
        if (!dbInstance) {
          this.logger.warn('GPU instance type not found', { gpuInstanceId });
          continue;
        }

        const rawPlan = rawPlanMap.get(dbInstance.instance_id);
        if (!rawPlan) {
          this.logger.warn('Raw GPU plan data not found', { instance_id: dbInstance.instance_id });
          continue;
        }

        // Calculate hourly price: monthly_cost / 730 hours
        const hourlyPrice = rawPlan.monthly_cost / 730;

        batch.push({
          gpu_instance_id: gpuInstanceId,
          region_id: regionId,
          hourly_price: hourlyPrice,
          monthly_price: rawPlan.monthly_cost,
          currency: 'USD',
          available: 1,
        });

        if (batch.length >= BATCH_SIZE) {
          yield batch;
          batch = [];
        }
      }
    }

    // Yield remaining records
    if (batch.length > 0) {
      yield batch;
    }
  }

  /**
   * Generate G8 pricing records in batches for Linode
   * Similar to GPU pricing generator but for G8 instances
   */
  private *generateLinodeG8PricingBatches(
    g8InstanceTypeIds: number[],
    regionIds: number[],
    dbG8InstanceMap: Map<number, { instance_id: string }>,
    rawInstanceMap: Map<string, { id: string; price: { hourly: number; monthly: number } }>,
    env?: Env
  ): Generator<G8PricingInput[], void, void> {
    const BATCH_SIZE = Math.min(
      Math.max(parseInt(env?.SYNC_BATCH_SIZE || '500', 10) || 500, 1),
      1000
    );
    let batch: G8PricingInput[] = [];

    for (const regionId of regionIds) {
      for (const g8InstanceId of g8InstanceTypeIds) {
        const dbInstance = dbG8InstanceMap.get(g8InstanceId);
        if (!dbInstance) {
          this.logger.warn('G8 instance type not found', { g8InstanceId });
          continue;
        }

        const rawInstance = rawInstanceMap.get(dbInstance.instance_id);
        if (!rawInstance) {
          this.logger.warn('Raw G8 instance data not found', { instance_id: dbInstance.instance_id });
          continue;
        }

        batch.push({
          g8_instance_id: g8InstanceId,
          region_id: regionId,
          hourly_price: rawInstance.price.hourly,
          monthly_price: rawInstance.price.monthly,
          currency: 'USD',
          available: 1,
        });

        if (batch.length >= BATCH_SIZE) {
          yield batch;
          batch = [];
        }
      }
    }

    // Yield remaining records
    if (batch.length > 0) {
      yield batch;
    }
  }

  /**
   * Generate VPU pricing records in batches for Linode
   * Similar to GPU pricing generator but for VPU instances
   */
  private *generateLinodeVpuPricingBatches(
    vpuInstanceTypeIds: number[],
    regionIds: number[],
    dbVpuInstanceMap: Map<number, { instance_id: string }>,
    rawInstanceMap: Map<string, { id: string; price: { hourly: number; monthly: number } }>,
    env?: Env
  ): Generator<VpuPricingInput[], void, void> {
    const BATCH_SIZE = Math.min(
      Math.max(parseInt(env?.SYNC_BATCH_SIZE || '500', 10) || 500, 1),
      1000
    );
    let batch: VpuPricingInput[] = [];

    for (const regionId of regionIds) {
      for (const vpuInstanceId of vpuInstanceTypeIds) {
        const dbInstance = dbVpuInstanceMap.get(vpuInstanceId);
        if (!dbInstance) {
          this.logger.warn('VPU instance type not found', { vpuInstanceId });
          continue;
        }

        const rawInstance = rawInstanceMap.get(dbInstance.instance_id);
        if (!rawInstance) {
          this.logger.warn('Raw VPU instance data not found', { instance_id: dbInstance.instance_id });
          continue;
        }

        batch.push({
          vpu_instance_id: vpuInstanceId,
          region_id: regionId,
          hourly_price: rawInstance.price.hourly,
          monthly_price: rawInstance.price.monthly,
          currency: 'USD',
          available: 1,
        });

        if (batch.length >= BATCH_SIZE) {
          yield batch;
          batch = [];
        }
      }
    }

    // Yield remaining records
    if (batch.length > 0) {
      yield batch;
    }
  }

  /**
   * Synchronize Anvil pricing based on source provider pricing
   *
   * Updates anvil_pricing table with retail prices calculated from source pricing
   * Formula: retail = cost × 1.21 (10% margin + 10% VAT)
   *
   * @param provider - Source provider name (linode, vultr, aws)
   * @returns Number of anvil_pricing records updated
   */
  private async syncAnvilPricing(provider: string): Promise<number> {
    this.logger.info('Starting Anvil pricing sync', { provider });

    try {
      // Step 1: Find all anvil_regions sourced from this provider
      const anvilRegionsResult = await this.repos.db
        .prepare('SELECT id, source_region_id FROM anvil_regions WHERE source_provider = ?')
        .bind(provider)
        .all<{ id: number; source_region_id: number }>();

      if (!anvilRegionsResult.success || anvilRegionsResult.results.length === 0) {
        this.logger.info('No anvil_regions found for provider', { provider });
        return 0;
      }

      const anvilRegions = anvilRegionsResult.results;
      this.logger.info('Found anvil_regions', { provider, count: anvilRegions.length });

      // Step 2: Find all anvil_pricing records with source_instance_id
      const anvilPricingResult = await this.repos.db
        .prepare(`
          SELECT
            ap.id,
            ap.anvil_instance_id,
            ap.anvil_region_id,
            ap.source_instance_id,
            ar.source_region_id
          FROM anvil_pricing ap
          JOIN anvil_regions ar ON ap.anvil_region_id = ar.id
          WHERE ar.source_provider = ?
            AND ap.source_instance_id IS NOT NULL
        `)
        .bind(provider)
        .all<{
          id: number;
          anvil_instance_id: number;
          anvil_region_id: number;
          source_instance_id: number;
          source_region_id: number;
        }>();

      if (!anvilPricingResult.success || anvilPricingResult.results.length === 0) {
        this.logger.info('No anvil_pricing records found with source_instance_id', { provider });
        return 0;
      }

      const anvilPricingRecords = anvilPricingResult.results;
      this.logger.info('Found anvil_pricing records to update', {
        provider,
        count: anvilPricingRecords.length
      });

      // Step 4: Fetch source pricing data with paired conditions
      // Batch queries to avoid SQLite limits (max 100 pairs per query)
      const CHUNK_SIZE = 100;
      const allSourcePricing: Array<{
        instance_type_id: number;
        region_id: number;
        hourly_price: number;
        monthly_price: number;
      }> = [];

      for (let i = 0; i < anvilPricingRecords.length; i += CHUNK_SIZE) {
        const chunk = anvilPricingRecords.slice(i, i + CHUNK_SIZE);
        if (chunk.length === 0) continue;

        const conditions = chunk
          .map(() => '(instance_type_id = ? AND region_id = ?)')
          .join(' OR ');
        const params = chunk.flatMap(r => [r.source_instance_id, r.source_region_id]);

        const chunkResult = await this.repos.db
          .prepare(`
            SELECT
              instance_type_id,
              region_id,
              hourly_price,
              monthly_price
            FROM pricing
            WHERE ${conditions}
          `)
          .bind(...params)
          .all<{
            instance_type_id: number;
            region_id: number;
            hourly_price: number;
            monthly_price: number;
          }>();

        if (chunkResult.success && chunkResult.results) {
          allSourcePricing.push(...chunkResult.results);
        }
      }

      if (allSourcePricing.length === 0) {
        this.logger.warn('No source pricing data found', { provider });
        return 0;
      }

      // Step 5: Build lookup map: `${instance_type_id}_${region_id}` → pricing
      const sourcePricingMap = new Map<string, { hourly_price: number; monthly_price: number }>(
        allSourcePricing.map(p => [
          `${p.instance_type_id}_${p.region_id}`,
          { hourly_price: p.hourly_price, monthly_price: p.monthly_price }
        ])
      );

      // Step 6: Prepare update statements
      const updateStatements: D1PreparedStatement[] = [];

      for (const record of anvilPricingRecords) {
        const lookupKey = `${record.source_instance_id}_${record.source_region_id}`;
        const sourcePricing = sourcePricingMap.get(lookupKey);

        if (!sourcePricing) {
          this.logger.warn('Source pricing not found', {
            anvil_pricing_id: record.id,
            source_instance_id: record.source_instance_id,
            source_region_id: record.source_region_id
          });
          continue;
        }

        // Calculate retail prices: cost × 1.21
        const hourlyPrice = calculateRetailHourly(sourcePricing.hourly_price);
        const monthlyPrice = calculateRetailMonthly(sourcePricing.monthly_price);

        updateStatements.push(
          this.repos.db.prepare(`
            UPDATE anvil_pricing
            SET
              hourly_price = ?,
              monthly_price = ?
            WHERE id = ?
          `).bind(
            hourlyPrice,
            monthlyPrice,
            record.id
          )
        );
      }

      if (updateStatements.length === 0) {
        this.logger.info('No anvil_pricing records to update', { provider });
        return 0;
      }

      // Step 7: Execute batch update
      const results = await this.repos.db.batch(updateStatements);
      const successCount = results.reduce(
        (sum, result) => sum + (result.meta?.changes ?? 0),
        0
      );

      this.logger.info('Anvil pricing sync completed', {
        provider,
        updated: successCount,
        total: updateStatements.length
      });

      return successCount;

    } catch (error) {
      this.logger.error('Anvil pricing sync failed', {
        provider,
        error: error instanceof Error ? error.message : String(error)
      });
      throw error;
    }
  }

  /**
   * Create connector for a specific provider
   *
   * @param provider - Provider name
   * @param providerId - Database provider ID
   * @returns Connector adapter instance for the provider
   * @throws Error if provider is not supported
   */
  private async createConnector(provider: string, providerId: number): Promise<SyncConnectorAdapter> {
    switch (provider.toLowerCase()) {
      case 'linode': {
        const connector = new LinodeConnector(this.env);
        // Cache instance types for pricing extraction
        let cachedInstanceTypes: Awaited<ReturnType<typeof connector.fetchInstanceTypes>> | null = null;

        return {
          authenticate: () => connector.initialize(),
          getRegions: async () => {
            const regions = await connector.fetchRegions();
            return regions.map(r => connector.normalizeRegion(r, providerId));
          },
          getInstanceTypes: async () => {
            const instances = await connector.fetchInstanceTypes();
            cachedInstanceTypes = instances; // Cache for pricing

            // Classification priority:
            // 1. GPU (gpus > 0) → handled in getGpuInstances
            // 2. VPU (id contains 'netint' or 'accelerated') → handled in getVpuInstances
            // 3. G8 (id starts with 'g8-') → handled in getG8Instances
            // 4. Default → regular instance_types
            const regularInstances = instances.filter(i => {
              if (i.gpus > 0) return false;
              if (i.id.includes('netint') || i.id.includes('accelerated')) return false;
              if (i.id.startsWith('g8-')) return false;
              return true;
            });
            return regularInstances.map(i => connector.normalizeInstance(i, providerId));
          },
          getGpuInstances: async (): Promise<GpuInstanceInput[]> => {
            // Use cached instances if available to avoid redundant API calls
            if (!cachedInstanceTypes) {
              this.logger.info('Fetching instance types for GPU extraction');
              cachedInstanceTypes = await connector.fetchInstanceTypes();
            }

            // Filter and normalize GPU instances
            const gpuInstances = cachedInstanceTypes.filter(i => i.gpus > 0);
            return gpuInstances.map(i => connector.normalizeGpuInstance(i, providerId));
          },
          getG8Instances: async (): Promise<G8InstanceInput[]> => {
            // Use cached instances if available to avoid redundant API calls
            if (!cachedInstanceTypes) {
              this.logger.info('Fetching instance types for G8 extraction');
              cachedInstanceTypes = await connector.fetchInstanceTypes();
            }

            // Filter and normalize G8 instances (g8- prefix)
            const g8Instances = cachedInstanceTypes.filter(i =>
              i.id.startsWith('g8-') && (!i.gpus || i.gpus === 0)
            );
            return g8Instances.map(i => connector.normalizeG8Instance(i, providerId));
          },
          getVpuInstances: async (): Promise<VpuInstanceInput[]> => {
            // Use cached instances if available to avoid redundant API calls
            if (!cachedInstanceTypes) {
              this.logger.info('Fetching instance types for VPU extraction');
              cachedInstanceTypes = await connector.fetchInstanceTypes();
            }

            // Filter and normalize VPU instances (netint or accelerated)
            const vpuInstances = cachedInstanceTypes.filter(i =>
              (i.id.includes('netint') || i.id.includes('accelerated')) && (!i.gpus || i.gpus === 0)
            );
            return vpuInstances.map(i => connector.normalizeVpuInstance(i, providerId));
          },
          getPricing: async (
            _instanceTypeIds: number[],
            regionIds: number[],
            dbInstanceMap: Map<number, { instance_id: string }>,
            dbGpuMap?: Map<number, { instance_id: string }>,
            dbG8Map?: Map<number, { instance_id: string }>,
            dbVpuMap?: Map<number, { instance_id: string }>
          ): Promise<number> => {
            /**
             * Linode Pricing Extraction Strategy (Generator Pattern):
             *
             * Linode pricing is embedded in instance type data (price.hourly, price.monthly).
             * Generate all region × instance combinations using generator pattern.
             * GPU instances are separated and stored in gpu_pricing table.
             *
             * Expected volume: ~190 regular + ~10 GPU instances × 20 regions = ~4,000 pricing records
             * Generator pattern with 100 records/batch minimizes memory usage
             * Each batch is immediately persisted to database to avoid memory buildup
             *
             * Memory savings: ~95% (4,000 records → 100 records in memory at a time)
             *
             * Manual Test:
             * 1. Run sync: curl -X POST http://localhost:8787/api/sync/linode
             * 2. Verify regular pricing: wrangler d1 execute cloud-instances-db --local --command "SELECT COUNT(*) FROM pricing WHERE instance_type_id IN (SELECT id FROM instance_types WHERE provider_id = (SELECT id FROM providers WHERE name = 'linode'))"
             * 3. Verify GPU pricing: wrangler d1 execute cloud-instances-db --local --command "SELECT COUNT(*) FROM gpu_pricing WHERE gpu_instance_id IN (SELECT id FROM gpu_instances WHERE provider_id = (SELECT id FROM providers WHERE name = 'linode'))"
             * 4. Sample pricing: wrangler d1 execute cloud-instances-db --local --command "SELECT p.*, i.instance_name, r.region_code FROM pricing p JOIN instance_types i ON p.instance_type_id = i.id JOIN regions r ON p.region_id = r.id WHERE i.provider_id = (SELECT id FROM providers WHERE name = 'linode') LIMIT 10"
             */

            // Re-fetch instance types if not cached
            if (!cachedInstanceTypes) {
              this.logger.info('Fetching instance types for pricing extraction');
              cachedInstanceTypes = await connector.fetchInstanceTypes();
            }

            // Create lookup map for raw instance data by instance_id (API ID)
            const rawInstanceMap = new Map(
              cachedInstanceTypes.map(i => [i.id, i])
            );

            // Use provided maps or create empty ones
            const gpuMap = dbGpuMap || new Map();
            const g8Map = dbG8Map || new Map();
            const vpuMap = dbVpuMap || new Map();

            // Separate instances by type: GPU, VPU, G8, and regular
            const gpuInstanceTypeIds: number[] = [];
            const g8InstanceTypeIds: number[] = [];
            const vpuInstanceTypeIds: number[] = [];
            const regularInstanceTypeIds: number[] = [];

            // Extract GPU instance IDs from gpuMap
            for (const dbId of gpuMap.keys()) {
              gpuInstanceTypeIds.push(dbId);
            }

            // Extract G8 instance IDs from g8Map
            for (const dbId of g8Map.keys()) {
              g8InstanceTypeIds.push(dbId);
            }

            // Extract VPU instance IDs from vpuMap
            for (const dbId of vpuMap.keys()) {
              vpuInstanceTypeIds.push(dbId);
            }

            // Regular instances from dbInstanceMap
            for (const dbId of dbInstanceMap.keys()) {
              regularInstanceTypeIds.push(dbId);
            }

            // Process regular instance pricing
            let regularPricingCount = 0;
            if (regularInstanceTypeIds.length > 0) {
              const regularGenerator = this.generateLinodePricingBatches(
                regularInstanceTypeIds,
                regionIds,
                dbInstanceMap,
                rawInstanceMap,
                this.env
              );

              for (const batch of regularGenerator) {
                const batchCount = await this.repos.pricing.upsertMany(batch);
                regularPricingCount += batchCount;
              }
            }

            // Process GPU instance pricing
            let gpuPricingCount = 0;
            if (gpuInstanceTypeIds.length > 0) {
              const gpuGenerator = this.generateLinodeGpuPricingBatches(
                gpuInstanceTypeIds,
                regionIds,
                gpuMap,
                rawInstanceMap,
                this.env
              );

              for (const batch of gpuGenerator) {
                const batchCount = await this.repos.gpuPricing.upsertMany(batch);
                gpuPricingCount += batchCount;
              }
            }

            // Process G8 instance pricing
            let g8PricingCount = 0;
            if (g8InstanceTypeIds.length > 0) {
              const g8Generator = this.generateLinodeG8PricingBatches(
                g8InstanceTypeIds,
                regionIds,
                g8Map,
                rawInstanceMap,
                this.env
              );

              for (const batch of g8Generator) {
                const batchCount = await this.repos.g8Pricing.upsertMany(batch);
                g8PricingCount += batchCount;
              }
            }

            // Process VPU instance pricing
            let vpuPricingCount = 0;
            if (vpuInstanceTypeIds.length > 0) {
              const vpuGenerator = this.generateLinodeVpuPricingBatches(
                vpuInstanceTypeIds,
                regionIds,
                vpuMap,
                rawInstanceMap,
                this.env
              );

              for (const batch of vpuGenerator) {
                const batchCount = await this.repos.vpuPricing.upsertMany(batch);
                vpuPricingCount += batchCount;
              }
            }

            const totalCount = regularPricingCount + gpuPricingCount + g8PricingCount + vpuPricingCount;
            this.logger.info('Generated and upserted pricing records for Linode', {
              regular_pricing: regularPricingCount,
              gpu_pricing: gpuPricingCount,
              g8_pricing: g8PricingCount,
              vpu_pricing: vpuPricingCount,
              total: totalCount
            });

            // Return total count of processed records
            return totalCount;
          },
        };
      }

      case 'vultr': {
        const connector = new VultrConnector(this.env);
        // Cache plans for pricing extraction
        let cachedPlans: Awaited<ReturnType<typeof connector.fetchPlans>> | null = null;

        return {
          authenticate: () => connector.initialize(),
          getRegions: async () => {
            const regions = await connector.fetchRegions();
            return regions.map(r => connector.normalizeRegion(r, providerId));
          },
          getInstanceTypes: async () => {
            const plans = await connector.fetchPlans();
            cachedPlans = plans; // Cache for pricing

            // Filter out GPU instances (vcg type)
            const regularPlans = plans.filter(p => !p.id.startsWith('vcg'));
            return regularPlans.map(p => connector.normalizeInstance(p, providerId));
          },
          getGpuInstances: async (): Promise<GpuInstanceInput[]> => {
            // Use cached plans if available to avoid redundant API calls
            if (!cachedPlans) {
              this.logger.info('Fetching plans for GPU extraction');
              cachedPlans = await connector.fetchPlans();
            }

            // Filter and normalize GPU instances (vcg type)
            const gpuPlans = cachedPlans.filter(p => p.id.startsWith('vcg'));
            return gpuPlans.map(p => connector.normalizeGpuInstance(p, providerId));
          },
          getPricing: async (
            instanceTypeIds: number[],
            regionIds: number[],
            dbInstanceMap: Map<number, { instance_id: string }>,
            dbGpuMap?: Map<number, { instance_id: string }>
          ): Promise<number> => {
            /**
             * Vultr Pricing Extraction Strategy (Generator Pattern):
             *
             * Vultr pricing is embedded in plan data (monthly_cost).
             * Generate all region × plan combinations using generator pattern.
             *
             * Expected volume: ~100 regular plans × 20 regions = ~2,000 pricing records
             *                  ~35 GPU plans × 20 regions = ~700 GPU pricing records
             * Generator pattern with 100 records/batch minimizes memory usage
             * Each batch is immediately persisted to database to avoid memory buildup
             *
             * Memory savings: ~95% (2,700 records → 100 records in memory at a time)
             *
             * Manual Test:
             * 1. Run sync: curl -X POST http://localhost:8787/api/sync/vultr
             * 2. Verify pricing count: wrangler d1 execute cloud-instances-db --local --command "SELECT COUNT(*) FROM pricing WHERE instance_type_id IN (SELECT id FROM instance_types WHERE provider_id = (SELECT id FROM providers WHERE name = 'vultr'))"
             * 3. Verify GPU pricing: wrangler d1 execute cloud-instances-db --local --command "SELECT COUNT(*) FROM gpu_pricing WHERE gpu_instance_id IN (SELECT id FROM gpu_instances WHERE provider_id = (SELECT id FROM providers WHERE name = 'vultr'))"
             * 4. Sample pricing: wrangler d1 execute cloud-instances-db --local --command "SELECT p.*, i.instance_name, r.region_code FROM pricing p JOIN instance_types i ON p.instance_type_id = i.id JOIN regions r ON p.region_id = r.id WHERE i.provider_id = (SELECT id FROM providers WHERE name = 'vultr') LIMIT 10"
             * 5. Verify data integrity: wrangler d1 execute cloud-instances-db --local --command "SELECT COUNT(*) FROM pricing WHERE hourly_price = 0 OR monthly_price = 0"
             */

            // Re-fetch plans if not cached
            if (!cachedPlans) {
              this.logger.info('Fetching plans for pricing extraction');
              cachedPlans = await connector.fetchPlans();
            }

            // Create lookup map for raw plan data by plan ID (API ID)
            const rawPlanMap = new Map(
              cachedPlans.map(p => [p.id, p])
            );

            // Process regular instance pricing
            let regularPricingCount = 0;
            if (instanceTypeIds.length > 0) {
              const regularGenerator = this.generateVultrPricingBatches(
                instanceTypeIds,
                regionIds,
                dbInstanceMap,
                rawPlanMap,
                this.env
              );

              for (const batch of regularGenerator) {
                const batchCount = await this.repos.pricing.upsertMany(batch);
                regularPricingCount += batchCount;
              }
            }

            // Process GPU instance pricing
            let gpuPricingCount = 0;
            const gpuMap = dbGpuMap || new Map();
            if (gpuMap.size > 0) {
              const gpuInstanceTypeIds = Array.from(gpuMap.keys());
              const gpuGenerator = this.generateVultrGpuPricingBatches(
                gpuInstanceTypeIds,
                regionIds,
                gpuMap,
                rawPlanMap,
                this.env
              );

              for (const batch of gpuGenerator) {
                const batchCount = await this.repos.gpuPricing.upsertMany(batch);
                gpuPricingCount += batchCount;
              }
            }

            const totalCount = regularPricingCount + gpuPricingCount;
            this.logger.info('Generated and upserted pricing records for Vultr', {
              regular_pricing: regularPricingCount,
              gpu_pricing: gpuPricingCount,
              total: totalCount
            });

            // Return total count of processed records
            return totalCount;
          },
        };
      }

      case 'aws': {
        const connector = new AWSConnector(this.env);
        // Cache instance types for pricing extraction
        let cachedInstanceTypes: Awaited<ReturnType<typeof connector.fetchInstanceTypes>> | null = null;

        return {
          authenticate: () => connector.initialize(),
          getRegions: async () => {
            const regions = await connector.fetchRegions();
            return regions.map(r => connector.normalizeRegion(r, providerId));
          },
          getInstanceTypes: async () => {
            const instances = await connector.fetchInstanceTypes();
            cachedInstanceTypes = instances; // Cache for pricing
            return instances.map(i => connector.normalizeInstance(i, providerId));
          },
          getPricing: async (
            instanceTypeIds: number[],
            regionIds: number[],
            dbInstanceMap: Map<number, { instance_id: string }>
          ): Promise<number> => {
            /**
             * AWS Pricing Extraction Strategy (Generator Pattern):
             *
             * AWS pricing from ec2.shop is region-agnostic (same price globally).
             * Generate all region × instance combinations using generator pattern.
             *
             * Expected volume: ~870 instances × 29 regions = ~25,230 pricing records
             * Generator pattern with 100 records/batch minimizes memory usage
             * Each batch is immediately persisted to database to avoid memory buildup
             *
             * Manual Test:
             * 1. Run sync: curl -X POST http://localhost:8787/api/sync/aws
             * 2. Verify pricing count: wrangler d1 execute cloud-instances-db --local --command "SELECT COUNT(*) FROM pricing WHERE instance_type_id IN (SELECT id FROM instance_types WHERE provider_id = (SELECT id FROM providers WHERE name = 'aws'))"
             * 3. Sample pricing: wrangler d1 execute cloud-instances-db --local --command "SELECT p.*, i.instance_name, r.region_code FROM pricing p JOIN instance_types i ON p.instance_type_id = i.id JOIN regions r ON p.region_id = r.id WHERE i.provider_id = (SELECT id FROM providers WHERE name = 'aws') LIMIT 10"
             * 4. Verify data integrity: wrangler d1 execute cloud-instances-db --local --command "SELECT COUNT(*) FROM pricing WHERE hourly_price = 0 OR monthly_price = 0"
             */

            // Re-fetch instance types if not cached
            if (!cachedInstanceTypes) {
              this.logger.info('Fetching instance types for pricing extraction');
              cachedInstanceTypes = await connector.fetchInstanceTypes();
            }

            // Create lookup map for raw instance data by instance_id (API ID)
            const rawInstanceMap = new Map(
              cachedInstanceTypes.map(i => [i.InstanceType, i])
            );

            // Use generator pattern for memory-efficient processing
            const pricingGenerator = this.generateAWSPricingBatches(
              instanceTypeIds,
              regionIds,
              dbInstanceMap,
              rawInstanceMap
            );

            // Process batches incrementally
            let totalCount = 0;
            for (const batch of pricingGenerator) {
              const batchCount = await this.repos.pricing.upsertMany(batch);
              totalCount += batchCount;
            }

            this.logger.info('Generated and upserted pricing records for AWS', { count: totalCount });

            // Return total count of processed records
            return totalCount;
          },
        };
      }

      default:
        throw new Error(`Unsupported provider: ${provider}`);
    }
  }
}