refactor: comprehensive code review fixes (security, performance, QA)

## Security Improvements
- Fix timing attack in verifyApiKey with fixed 256-byte buffer
- Fix sortOrder SQL injection with whitelist validation
- Fix rate limiting bypass for non-Cloudflare traffic (fail-closed)
- Remove stack trace exposure in error responses
- Add request_id for audit trail (X-Request-ID header)
- Sanitize origin header to prevent log injection
- Add content-length validation for /sync endpoint (10KB limit)
- Replace Math.random() with crypto.randomUUID() for sync IDs
- Expand sensitive data masking patterns (8 → 18)

## Performance Improvements
- Reduce rate limiter KV reads from 3 to 1 per request (66% reduction)
- Increase sync batch size from 100 to 500 (80% fewer batches)
- Fix health check N+1 query with efficient JOINs
- Fix COUNT(*) Cartesian product with COUNT(DISTINCT)
- Implement shared logger cache pattern across repositories
- Add CacheService singleton pattern in recommend.ts
- Add composite index for recommendation queries
- Implement Anvil pricing query batching (100 per chunk)

## QA Improvements
- Add BATCH_SIZE bounds validation (1-1000)
- Add pagination bounds (page >= 1, MAX_OFFSET = 100000)
- Add min/max range consistency validation
- Add DB reference validation for singleton services
- Add type guards for database result validation
- Add timeout mechanism for external API calls (10-60s)
- Use SUPPORTED_PROVIDERS constant instead of hardcoded list

## Removed
- Remove Vault integration (using Wrangler secrets)
- Remove 6-hour pricing cron (daily sync only)

## Configuration
- Add idx_instance_types_specs_filter composite index
- Add CORS Access-Control-Expose-Headers

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
kappa
2026-01-25 23:50:37 +09:00
parent 9f3d3a245a
commit 3a8dd705e6
47 changed files with 2031 additions and 2459 deletions

View File

@@ -8,17 +8,16 @@
* - Batch operations for efficiency
*
* @example
* const orchestrator = new SyncOrchestrator(db, vault);
* const orchestrator = new SyncOrchestrator(db, env);
* const report = await orchestrator.syncAll(['linode']);
*/
import { VaultClient } from '../connectors/vault';
import { LinodeConnector } from '../connectors/linode';
import { VultrConnector } from '../connectors/vultr';
import { AWSConnector } from '../connectors/aws';
import { RepositoryFactory } from '../repositories';
import { createLogger } from '../utils/logger';
import { calculateRetailHourly, calculateRetailMonthly } from '../constants';
import { calculateRetailHourly, calculateRetailMonthly, SUPPORTED_PROVIDERS } from '../constants';
import type {
Env,
ProviderSyncResult,
@@ -28,9 +27,34 @@ import type {
PricingInput,
GpuInstanceInput,
GpuPricingInput,
G8InstanceInput,
G8PricingInput,
VpuInstanceInput,
VpuPricingInput,
} from '../types';
import { SyncStage } from '../types';
/**
* Wraps a promise with a timeout
* @param promise - The promise to wrap
* @param ms - Timeout in milliseconds
* @param operation - Operation name for error message
* @returns Promise result if completed within timeout
* @throws Error if operation times out
*/
async function withTimeout<T>(promise: Promise<T>, ms: number, operation: string): Promise<T> {
let timeoutId: ReturnType<typeof setTimeout>;
const timeoutPromise = new Promise<never>((_, reject) => {
timeoutId = setTimeout(() => reject(new Error(`${operation} timed out after ${ms}ms`)), ms);
});
try {
return await Promise.race([promise, timeoutPromise]);
} finally {
clearTimeout(timeoutId!);
}
}
/**
* Cloud provider connector interface for SyncOrchestrator
*
@@ -53,10 +77,10 @@ export interface SyncConnectorAdapter {
getGpuInstances?(): Promise<GpuInstanceInput[]>;
/** Fetch G8 instances (optional, only for Linode) */
getG8Instances?(): Promise<any[]>;
getG8Instances?(): Promise<G8InstanceInput[]>;
/** Fetch VPU instances (optional, only for Linode) */
getVpuInstances?(): Promise<any[]>;
getVpuInstances?(): Promise<VpuInstanceInput[]>;
/**
* Fetch pricing data for instances and regions
@@ -84,15 +108,12 @@ export interface SyncConnectorAdapter {
export class SyncOrchestrator {
private repos: RepositoryFactory;
private logger: ReturnType<typeof createLogger>;
private env?: Env;
constructor(
db: D1Database,
private vault: VaultClient,
env?: Env
private env: Env
) {
this.repos = new RepositoryFactory(db, env);
this.env = env;
this.logger = createLogger('[SyncOrchestrator]', env);
this.logger.info('Initialized');
}
@@ -121,21 +142,20 @@ export class SyncOrchestrator {
await this.repos.providers.updateSyncStatus(provider, 'syncing');
this.logger.info(`${provider}${stage}`);
// Stage 2: Fetch credentials from Vault
stage = SyncStage.FETCH_CREDENTIALS;
// Stage 2: Initialize connector and authenticate
const connector = await this.createConnector(provider, providerRecord.id);
await connector.authenticate();
this.logger.info(`${provider}${stage}`);
await withTimeout(connector.authenticate(), 10000, `${provider} authentication`);
this.logger.info(`${provider}initialized`);
// Stage 3: Fetch regions from provider API
stage = SyncStage.FETCH_REGIONS;
const regions = await connector.getRegions();
const regions = await withTimeout(connector.getRegions(), 15000, `${provider} fetch regions`);
this.logger.info(`${provider}${stage}`, { regions: regions.length });
// Stage 4: Fetch instance types from provider API
stage = SyncStage.FETCH_INSTANCES;
const instances = await connector.getInstanceTypes();
const instances = await withTimeout(connector.getInstanceTypes(), 30000, `${provider} fetch instances`);
this.logger.info(`${provider}${stage}`, { instances: instances.length });
// Stage 5: Normalize data (add provider_id)
@@ -170,8 +190,8 @@ export class SyncOrchestrator {
if (provider.toLowerCase() === 'linode') {
// GPU instances
if ('getGpuInstances' in connector) {
const gpuInstances = await (connector as any).getGpuInstances();
if (connector.getGpuInstances) {
const gpuInstances = await withTimeout(connector.getGpuInstances(), 15000, `${provider} fetch GPU instances`);
if (gpuInstances && gpuInstances.length > 0) {
gpuInstancesCount = await this.repos.gpuInstances.upsertMany(
providerRecord.id,
@@ -181,8 +201,8 @@ export class SyncOrchestrator {
}
// G8 instances
if ('getG8Instances' in connector) {
const g8Instances = await (connector as any).getG8Instances();
if (connector.getG8Instances) {
const g8Instances = await withTimeout(connector.getG8Instances(), 15000, `${provider} fetch G8 instances`);
if (g8Instances && g8Instances.length > 0) {
g8InstancesCount = await this.repos.g8Instances.upsertMany(
providerRecord.id,
@@ -192,8 +212,8 @@ export class SyncOrchestrator {
}
// VPU instances
if ('getVpuInstances' in connector) {
const vpuInstances = await (connector as any).getVpuInstances();
if (connector.getVpuInstances) {
const vpuInstances = await withTimeout(connector.getVpuInstances(), 15000, `${provider} fetch VPU instances`);
if (vpuInstances && vpuInstances.length > 0) {
vpuInstancesCount = await this.repos.vpuInstances.upsertMany(
providerRecord.id,
@@ -205,8 +225,8 @@ export class SyncOrchestrator {
// Handle Vultr GPU instances
if (provider.toLowerCase() === 'vultr') {
if ('getGpuInstances' in connector) {
const gpuInstances = await (connector as any).getGpuInstances();
if (connector.getGpuInstances) {
const gpuInstances = await withTimeout(connector.getGpuInstances(), 15000, `${provider} fetch GPU instances`);
if (gpuInstances && gpuInstances.length > 0) {
gpuInstancesCount = await this.repos.gpuInstances.upsertMany(
providerRecord.id,
@@ -234,9 +254,27 @@ export class SyncOrchestrator {
throw new Error('Failed to fetch regions/instances for pricing');
}
// Type-safe extraction of IDs and mapping data from batch results
const regionIds = (dbRegionsResult.results as Array<{ id: number }>).map(r => r.id);
const dbInstancesData = dbInstancesResult.results as Array<{ id: number; instance_id: string }>;
// Validate and extract region IDs
if (!Array.isArray(dbRegionsResult.results)) {
throw new Error('Unexpected database result format for regions');
}
const regionIds = dbRegionsResult.results.map((r: any) => {
if (typeof r?.id !== 'number') {
throw new Error('Invalid region id in database result');
}
return r.id;
});
// Validate and extract instance type data
if (!Array.isArray(dbInstancesResult.results)) {
throw new Error('Unexpected database result format for instances');
}
const dbInstancesData = dbInstancesResult.results.map((i: any) => {
if (typeof i?.id !== 'number' || typeof i?.instance_id !== 'string') {
throw new Error('Invalid instance data in database result');
}
return { id: i.id, instance_id: i.instance_id };
});
const instanceTypeIds = dbInstancesData.map(i => i.id);
// Create instance mapping to avoid redundant queries in getPricing
@@ -244,26 +282,56 @@ export class SyncOrchestrator {
dbInstancesData.map(i => [i.id, { instance_id: i.instance_id }])
);
// Create specialized instance mappings
// Create specialized instance mappings with validation
if (!Array.isArray(dbGpuResult.results)) {
throw new Error('Unexpected database result format for GPU instances');
}
const dbGpuMap = new Map(
(dbGpuResult.results as Array<{ id: number; instance_id: string }>).map(i => [i.id, { instance_id: i.instance_id }])
dbGpuResult.results.map((i: any) => {
if (typeof i?.id !== 'number' || typeof i?.instance_id !== 'string') {
throw new Error('Invalid GPU instance data in database result');
}
return [i.id, { instance_id: i.instance_id }];
})
);
if (!Array.isArray(dbG8Result.results)) {
throw new Error('Unexpected database result format for G8 instances');
}
const dbG8Map = new Map(
(dbG8Result.results as Array<{ id: number; instance_id: string }>).map(i => [i.id, { instance_id: i.instance_id }])
dbG8Result.results.map((i: any) => {
if (typeof i?.id !== 'number' || typeof i?.instance_id !== 'string') {
throw new Error('Invalid G8 instance data in database result');
}
return [i.id, { instance_id: i.instance_id }];
})
);
if (!Array.isArray(dbVpuResult.results)) {
throw new Error('Unexpected database result format for VPU instances');
}
const dbVpuMap = new Map(
(dbVpuResult.results as Array<{ id: number; instance_id: string }>).map(i => [i.id, { instance_id: i.instance_id }])
dbVpuResult.results.map((i: any) => {
if (typeof i?.id !== 'number' || typeof i?.instance_id !== 'string') {
throw new Error('Invalid VPU instance data in database result');
}
return [i.id, { instance_id: i.instance_id }];
})
);
// Get pricing data - may return array or count depending on provider
// Pass all instance maps for specialized pricing
const pricingResult = await connector.getPricing(
instanceTypeIds,
regionIds,
dbInstanceMap,
dbGpuMap,
dbG8Map,
dbVpuMap
const pricingResult = await withTimeout(
connector.getPricing(
instanceTypeIds,
regionIds,
dbInstanceMap,
dbGpuMap,
dbG8Map,
dbVpuMap
),
60000,
`${provider} fetch pricing`
);
// Handle both return types: array (Linode, Vultr) or number (AWS with generator)
@@ -294,7 +362,7 @@ export class SyncOrchestrator {
this.logger.info(`${provider}${stage}`);
// Stage 8: Sync Anvil Pricing (if applicable)
stage = 'SYNC_ANVIL_PRICING' as SyncStage;
stage = SyncStage.SYNC_ANVIL_PRICING;
let anvilPricingCount = 0;
try {
anvilPricingCount = await this.syncAnvilPricing(provider);
@@ -349,7 +417,7 @@ export class SyncOrchestrator {
error_details: {
stage,
message: errorMessage,
stack: error instanceof Error ? error.stack : undefined,
// Stack trace logged server-side only, not exposed to clients
},
};
}
@@ -357,46 +425,51 @@ export class SyncOrchestrator {
/**
* Synchronize all providers
* Runs synchronizations in parallel for efficiency
*
* IMPORTANT: Providers are synced sequentially (not in parallel) to avoid
* exceeding Cloudflare Workers' 30-second CPU time limit. Each provider
* sync involves multiple API calls and database operations.
*
* For production deployments with large datasets, consider using
* Cloudflare Queues to process each provider as a separate job.
*
* @param providers - Array of provider names to sync (defaults to all supported providers)
* @returns Complete sync report with statistics
*/
async syncAll(providers = ['linode', 'vultr', 'aws']): Promise<SyncReport> {
async syncAll(providers: string[] = [...SUPPORTED_PROVIDERS]): Promise<SyncReport> {
const startedAt = new Date().toISOString();
const startTime = Date.now();
this.logger.info('Starting sync for providers', { providers: providers.join(', ') });
this.logger.info('Starting sequential sync for providers', { providers: providers.join(', ') });
// Run all provider syncs in parallel
const results = await Promise.allSettled(
providers.map(p => this.syncProvider(p))
);
// Run provider syncs sequentially to avoid CPU timeout
// Each provider sync is independent and can complete within time limits
const providerResults: ProviderSyncResult[] = [];
// Extract results
const providerResults: ProviderSyncResult[] = results.map((result, index) => {
if (result.status === 'fulfilled') {
return result.value;
} else {
// Handle rejected promises
const provider = providers[index];
const errorMessage = result.reason instanceof Error
? result.reason.message
: 'Unknown error';
for (const provider of providers) {
try {
const result = await this.syncProvider(provider);
providerResults.push(result);
this.logger.error(`${provider} promise rejected`, { error: result.reason instanceof Error ? result.reason.message : String(result.reason) });
return {
// Log progress after each provider
this.logger.info('Provider sync completed', {
provider,
success: result.success,
elapsed_ms: Date.now() - startTime
});
} catch (error) {
// Handle unexpected errors
providerResults.push({
provider,
success: false,
regions_synced: 0,
instances_synced: 0,
pricing_synced: 0,
duration_ms: 0,
error: errorMessage,
};
error: error instanceof Error ? error.message : 'Unknown error',
});
}
});
}
const completedAt = new Date().toISOString();
const totalDuration = Date.now() - startTime;
@@ -452,7 +525,7 @@ export class SyncOrchestrator {
dbInstanceMap: Map<number, { instance_id: string }>,
rawInstanceMap: Map<string, { Cost: number; MonthlyPrice: number }>
): Generator<PricingInput[], void, void> {
const BATCH_SIZE = 100;
const BATCH_SIZE = 500;
let batch: PricingInput[] = [];
for (const regionId of regionIds) {
@@ -515,7 +588,10 @@ export class SyncOrchestrator {
rawInstanceMap: Map<string, { id: string; price: { hourly: number; monthly: number } }>,
env?: Env
): Generator<PricingInput[], void, void> {
const BATCH_SIZE = parseInt(env?.SYNC_BATCH_SIZE || '100', 10);
const BATCH_SIZE = Math.min(
Math.max(parseInt(env?.SYNC_BATCH_SIZE || '500', 10) || 500, 1),
1000
);
let batch: PricingInput[] = [];
for (const regionId of regionIds) {
@@ -578,7 +654,10 @@ export class SyncOrchestrator {
rawPlanMap: Map<string, { id: string; monthly_cost: number }>,
env?: Env
): Generator<PricingInput[], void, void> {
const BATCH_SIZE = parseInt(env?.SYNC_BATCH_SIZE || '100', 10);
const BATCH_SIZE = Math.min(
Math.max(parseInt(env?.SYNC_BATCH_SIZE || '500', 10) || 500, 1),
1000
);
let batch: PricingInput[] = [];
for (const regionId of regionIds) {
@@ -644,7 +723,10 @@ export class SyncOrchestrator {
rawInstanceMap: Map<string, { id: string; price: { hourly: number; monthly: number } }>,
env?: Env
): Generator<GpuPricingInput[], void, void> {
const BATCH_SIZE = parseInt(env?.SYNC_BATCH_SIZE || '100', 10);
const BATCH_SIZE = Math.min(
Math.max(parseInt(env?.SYNC_BATCH_SIZE || '500', 10) || 500, 1),
1000
);
let batch: GpuPricingInput[] = [];
for (const regionId of regionIds) {
@@ -707,7 +789,10 @@ export class SyncOrchestrator {
rawPlanMap: Map<string, { id: string; monthly_cost: number }>,
env?: Env
): Generator<GpuPricingInput[], void, void> {
const BATCH_SIZE = parseInt(env?.SYNC_BATCH_SIZE || '100', 10);
const BATCH_SIZE = Math.min(
Math.max(parseInt(env?.SYNC_BATCH_SIZE || '500', 10) || 500, 1),
1000
);
let batch: GpuPricingInput[] = [];
for (const regionId of regionIds) {
@@ -759,9 +844,12 @@ export class SyncOrchestrator {
dbG8InstanceMap: Map<number, { instance_id: string }>,
rawInstanceMap: Map<string, { id: string; price: { hourly: number; monthly: number } }>,
env?: Env
): Generator<any[], void, void> {
const BATCH_SIZE = parseInt(env?.SYNC_BATCH_SIZE || '100', 10);
let batch: any[] = [];
): Generator<G8PricingInput[], void, void> {
const BATCH_SIZE = Math.min(
Math.max(parseInt(env?.SYNC_BATCH_SIZE || '500', 10) || 500, 1),
1000
);
let batch: G8PricingInput[] = [];
for (const regionId of regionIds) {
for (const g8InstanceId of g8InstanceTypeIds) {
@@ -809,9 +897,12 @@ export class SyncOrchestrator {
dbVpuInstanceMap: Map<number, { instance_id: string }>,
rawInstanceMap: Map<string, { id: string; price: { hourly: number; monthly: number } }>,
env?: Env
): Generator<any[], void, void> {
const BATCH_SIZE = parseInt(env?.SYNC_BATCH_SIZE || '100', 10);
let batch: any[] = [];
): Generator<VpuPricingInput[], void, void> {
const BATCH_SIZE = Math.min(
Math.max(parseInt(env?.SYNC_BATCH_SIZE || '500', 10) || 500, 1),
1000
);
let batch: VpuPricingInput[] = [];
for (const regionId of regionIds) {
for (const vpuInstanceId of vpuInstanceTypeIds) {
@@ -910,37 +1001,56 @@ export class SyncOrchestrator {
count: anvilPricingRecords.length
});
// Step 4: Fetch source pricing data in batch
const sourcePricingResult = await this.repos.db
.prepare(`
SELECT
instance_type_id,
region_id,
hourly_price,
monthly_price
FROM pricing
WHERE instance_type_id IN (${anvilPricingRecords.map(() => '?').join(',')})
AND region_id IN (${anvilPricingRecords.map(() => '?').join(',')})
`)
.bind(
...anvilPricingRecords.map(r => r.source_instance_id),
...anvilPricingRecords.map(r => r.source_region_id)
)
.all<{
instance_type_id: number;
region_id: number;
hourly_price: number;
monthly_price: number;
}>();
// Step 4: Fetch source pricing data with paired conditions
// Batch queries to avoid SQLite limits (max 100 pairs per query)
const CHUNK_SIZE = 100;
const allSourcePricing: Array<{
instance_type_id: number;
region_id: number;
hourly_price: number;
monthly_price: number;
}> = [];
if (!sourcePricingResult.success || sourcePricingResult.results.length === 0) {
for (let i = 0; i < anvilPricingRecords.length; i += CHUNK_SIZE) {
const chunk = anvilPricingRecords.slice(i, i + CHUNK_SIZE);
if (chunk.length === 0) continue;
const conditions = chunk
.map(() => '(instance_type_id = ? AND region_id = ?)')
.join(' OR ');
const params = chunk.flatMap(r => [r.source_instance_id, r.source_region_id]);
const chunkResult = await this.repos.db
.prepare(`
SELECT
instance_type_id,
region_id,
hourly_price,
monthly_price
FROM pricing
WHERE ${conditions}
`)
.bind(...params)
.all<{
instance_type_id: number;
region_id: number;
hourly_price: number;
monthly_price: number;
}>();
if (chunkResult.success && chunkResult.results) {
allSourcePricing.push(...chunkResult.results);
}
}
if (allSourcePricing.length === 0) {
this.logger.warn('No source pricing data found', { provider });
return 0;
}
// Step 5: Build lookup map: `${instance_type_id}_${region_id}` → pricing
const sourcePricingMap = new Map<string, { hourly_price: number; monthly_price: number }>(
sourcePricingResult.results.map(p => [
allSourcePricing.map(p => [
`${p.instance_type_id}_${p.region_id}`,
{ hourly_price: p.hourly_price, monthly_price: p.monthly_price }
])
@@ -1021,7 +1131,7 @@ export class SyncOrchestrator {
private async createConnector(provider: string, providerId: number): Promise<SyncConnectorAdapter> {
switch (provider.toLowerCase()) {
case 'linode': {
const connector = new LinodeConnector(this.vault);
const connector = new LinodeConnector(this.env);
// Cache instance types for pricing extraction
let cachedInstanceTypes: Awaited<ReturnType<typeof connector.fetchInstanceTypes>> | null = null;
@@ -1059,7 +1169,7 @@ export class SyncOrchestrator {
const gpuInstances = cachedInstanceTypes.filter(i => i.gpus > 0);
return gpuInstances.map(i => connector.normalizeGpuInstance(i, providerId));
},
getG8Instances: async (): Promise<any[]> => {
getG8Instances: async (): Promise<G8InstanceInput[]> => {
// Use cached instances if available to avoid redundant API calls
if (!cachedInstanceTypes) {
this.logger.info('Fetching instance types for G8 extraction');
@@ -1072,7 +1182,7 @@ export class SyncOrchestrator {
);
return g8Instances.map(i => connector.normalizeG8Instance(i, providerId));
},
getVpuInstances: async (): Promise<any[]> => {
getVpuInstances: async (): Promise<VpuInstanceInput[]> => {
// Use cached instances if available to avoid redundant API calls
if (!cachedInstanceTypes) {
this.logger.info('Fetching instance types for VPU extraction');
@@ -1239,7 +1349,7 @@ export class SyncOrchestrator {
}
case 'vultr': {
const connector = new VultrConnector(this.vault);
const connector = new VultrConnector(this.env);
// Cache plans for pricing extraction
let cachedPlans: Awaited<ReturnType<typeof connector.fetchPlans>> | null = null;
@@ -1356,7 +1466,7 @@ export class SyncOrchestrator {
}
case 'aws': {
const connector = new AWSConnector(this.vault);
const connector = new AWSConnector(this.env);
// Cache instance types for pricing extraction
let cachedInstanceTypes: Awaited<ReturnType<typeof connector.fetchInstanceTypes>> | null = null;