Rate Limiting Your Express API: From Basic to Production-Ready

Why Rate Limiting Matters

Every public-facing API needs rate limiting. Without it, a single bad actor — or even an enthusiastic customer — can overwhelm your server, spike your cloud costs, and degrade the experience for everyone else.

For the Inlinex booking system, rate limiting was essential to prevent OTP spam (each WhatsApp message costs money) and booking abuse.

Level 1: Basic Express Rate Limiting

The simplest approach using express-rate-limit:

const rateLimit = require('express-rate-limit');

const apiLimiter = rateLimit({
  windowMs: 15 * 60 * 1000,  // 15 minutes
  max: 100,                   // 100 requests per window
  message: {
    error: 'Too many requests, please try again later.'
  },
  standardHeaders: true,      // Send RateLimit-* headers
  legacyHeaders: false
});

app.use('/api/', apiLimiter);

This works for single-server deployments. The counter is stored in memory and resets when the server restarts.

Level 2: Endpoint-Specific Limits

Different endpoints need different limits:

// Strict limit for authentication endpoints
const authLimiter = rateLimit({
  windowMs: 15 * 60 * 1000,
  max: 5,
  message: { error: 'Too many login attempts. Try again in 15 minutes.' }
});

// Moderate limit for API endpoints
const apiLimiter = rateLimit({
  windowMs: 60 * 1000,
  max: 30
});

// Strict limit for OTP sending (costs money)
const otpLimiter = rateLimit({
  windowMs: 60 * 60 * 1000,  // 1 hour
  max: 3,
  message: { error: 'OTP limit reached. Try again in 1 hour.' }
});

app.use('/api/auth/', authLimiter);
app.use('/api/', apiLimiter);
app.post('/api/otp/send', otpLimiter);

Level 3: Progressive Rate Limiting

For the booking system, we implemented progressive delays. The first few requests are instant, then delays increase:

function progressiveRateLimit(options) {
  const { maxFree, maxTotal, windowMs, delayAfter, delayMs } = options;
  const requests = new Map();

  return (req, res, next) => {
    const key = req.ip;
    const now = Date.now();
    
    // Clean expired entries
    const entry = requests.get(key) || { count: 0, firstRequest: now };
    if (now - entry.firstRequest > windowMs) {
      entry.count = 0;
      entry.firstRequest = now;
    }
    
    entry.count++;
    requests.set(key, entry);

    // Hard limit
    if (entry.count > maxTotal) {
      return res.status(429).json({
        error: 'Rate limit exceeded',
        retryAfter: Math.ceil((entry.firstRequest + windowMs - now) / 1000)
      });
    }

    // Progressive delay
    if (entry.count > delayAfter) {
      const delay = (entry.count - delayAfter) * delayMs;
      return setTimeout(next, Math.min(delay, 5000));
    }

    next();
  };
}

app.post('/api/bookings', progressiveRateLimit({
  maxFree: 3,
  maxTotal: 10,
  windowMs: 60 * 60 * 1000,
  delayAfter: 3,
  delayMs: 500
}));

Level 4: Redis-Backed Distributed Limiting

For multi-server deployments, in-memory counters don't work. Use Redis:

const RedisStore = require('rate-limit-redis');
const Redis = require('ioredis');

const redisClient = new Redis(process.env.REDIS_URL);

const distributedLimiter = rateLimit({
  store: new RedisStore({
    sendCommand: (...args) => redisClient.call(...args)
  }),
  windowMs: 15 * 60 * 1000,
  max: 100,
  standardHeaders: true
});

Redis ensures rate limits are shared across all server instances.

Level 5: Token Bucket Algorithm

For API-heavy applications like ShipAnywhere, the token bucket algorithm provides smoother rate limiting:

class TokenBucket {
  constructor(capacity, refillRate) {
    this.capacity = capacity;
    this.tokens = capacity;
    this.refillRate = refillRate; // tokens per second
    this.lastRefill = Date.now();
  }

  consume(tokens = 1) {
    this.refill();
    
    if (this.tokens >= tokens) {
      this.tokens -= tokens;
      return true;
    }
    return false;
  }

  refill() {
    const now = Date.now();
    const elapsed = (now - this.lastRefill) / 1000;
    this.tokens = Math.min(
      this.capacity,
      this.tokens + (elapsed * this.refillRate)
    );
    this.lastRefill = now;
  }
}

const buckets = new Map();

function tokenBucketMiddleware(capacity, refillRate) {
  return (req, res, next) => {
    const key = req.user?.id || req.ip;
    
    if (!buckets.has(key)) {
      buckets.set(key, new TokenBucket(capacity, refillRate));
    }
    
    const bucket = buckets.get(key);
    if (bucket.consume()) {
      next();
    } else {
      res.status(429).json({
        error: 'Rate limit exceeded',
        retryAfter: Math.ceil(1 / refillRate)
      });
    }
  };
}

// 40 requests capacity, refilling at 2 per second
app.use('/api/', tokenBucketMiddleware(40, 2));

Response Headers

Always include rate limit information in responses:

res.set({
  'RateLimit-Limit': maxRequests,
  'RateLimit-Remaining': remaining,
  'RateLimit-Reset': resetTime,
  'Retry-After': retryAfterSeconds  // Only on 429 responses
});

Clients can use these headers to throttle their own requests proactively.

Key Considerations

Identify users correctly — use authenticated user ID over IP when possible. Shared IPs (offices, VPNs) cause false positives.
Separate limits by tier — free users get lower limits than paid users.
Don't rate limit health checks — monitoring endpoints should always respond.
Log rate limit hits — they might indicate bugs in client code, not abuse.
Fail open vs. closed — if Redis is down, should you allow all requests or reject all? For most apps, fail open is safer.

Conclusion

Start with basic express-rate-limit for simple applications and graduate to Redis-backed or token bucket approaches as your needs grow. The most important thing is having any rate limiting at all — even basic protection prevents the most common abuse patterns.