Clerk-Convex Data Synchronization Design

Overview

This document outlines the design for synchronizing user data between Clerk (authentication) and Convex (data storage) in the do.dev platform.

Architecture

graph TB
    subgraph "Authentication Layer"
        CLERK[Clerk User Management]
        WEBHOOK[Clerk Webhooks]
    end
    
    subgraph "Synchronization Layer"
        SYNC[Sync Service]
        QUEUE[Event Queue]
        MAPPER[Data Mapper]
    end
    
    subgraph "Data Layer"
        CONVEX_USERS[Convex Users Table]
        CONVEX_PROFILES[Convex Profiles Table]
        CONVEX_FILES[Convex Files Table]
    end
    
    CLERK --> WEBHOOK
    WEBHOOK --> QUEUE
    QUEUE --> SYNC
    SYNC --> MAPPER
    MAPPER --> CONVEX_USERS
    MAPPER --> CONVEX_PROFILES
    CONVEX_USERS --> CONVEX_FILES

Data Models

Clerk User Model

interface ClerkUser {
  id: string;
  emailAddresses: EmailAddress[];
  firstName: string | null;
  lastName: string | null;
  fullName: string | null;
  username: string | null;
  imageUrl: string;
  publicMetadata: {
    appId?: string;
    custId?: string;
    userId?: string;
    roles?: string[];
    verified?: boolean;
    lastLoginAt?: number;
    onboardingCompleted?: boolean;
    bio?: string;
  };
  createdAt: number;
  updatedAt: number;
}

Convex User Model

// convex/schema.ts
users: defineTable({
  // Clerk sync fields
  clerkId: v.string(),
  email: v.string(),
  name: v.optional(v.string()),
  givenName: v.optional(v.string()),
  familyName: v.optional(v.string()),
  
  // Legacy fields (to be migrated)
  tokenIdentifier: v.optional(v.string()),
  emailVerified: v.optional(v.boolean()),
  hasGeneratedAvatar: v.optional(v.boolean()),
  image: v.optional(v.string()),
  
  // Timestamps
  createdAt: v.number(),
  updatedAt: v.number(),
})
.index("by_clerk_id", ["clerkId"])
.index("by_email", ["email"]),

userProfiles: defineTable({
  userId: v.id("users"),
  displayName: v.optional(v.string()),
  bio: v.optional(v.string()),
  avatarUrl: v.optional(v.string()),
  avatarStorageId: v.optional(v.id("_storage")),
})
.index("by_user", ["userId"]),

Synchronization Strategies

1. Initial User Creation

// convex/functions/syncUser.ts
export const createUserFromClerk = mutation({
  args: {
    clerkId: v.string(),
    email: v.string(),
    name: v.optional(v.string()),
    givenName: v.optional(v.string()),
    familyName: v.optional(v.string()),
    imageUrl: v.optional(v.string()),
    metadata: v.object({
      roles: v.optional(v.array(v.string())),
      verified: v.optional(v.boolean()),
    }),
  },
  handler: async (ctx, args) => {
    // Check if user already exists
    const existing = await ctx.db
      .query("users")
      .withIndex("by_clerk_id", (q) => q.eq("clerkId", args.clerkId))
      .first();
    
    if (existing) {
      return existing._id;
    }
    
    // Create new user
    const userId = await ctx.db.insert("users", {
      clerkId: args.clerkId,
      email: args.email,
      name: args.name,
      givenName: args.givenName,
      familyName: args.familyName,
      image: args.imageUrl,
      emailVerified: args.metadata.verified || false,
      createdAt: Date.now(),
      updatedAt: Date.now(),
    });
    
    // Create user profile
    await ctx.db.insert("userProfiles", {
      userId,
      displayName: args.name,
      avatarUrl: args.imageUrl,
    });
    
    return userId;
  },
});

2. User Update Synchronization

// convex/functions/syncUser.ts
export const updateUserFromClerk = mutation({
  args: {
    clerkId: v.string(),
    updates: v.object({
      email: v.optional(v.string()),
      name: v.optional(v.string()),
      givenName: v.optional(v.string()),
      familyName: v.optional(v.string()),
      imageUrl: v.optional(v.string()),
      metadata: v.optional(v.object({
        roles: v.optional(v.array(v.string())),
        verified: v.optional(v.boolean()),
        bio: v.optional(v.string()),
      })),
    }),
  },
  handler: async (ctx, args) => {
    const user = await ctx.db
      .query("users")
      .withIndex("by_clerk_id", (q) => q.eq("clerkId", args.clerkId))
      .first();
    
    if (!user) {
      throw new Error("User not found");
    }
    
    // Update user record
    await ctx.db.patch(user._id, {
      ...args.updates,
      image: args.updates.imageUrl,
      emailVerified: args.updates.metadata?.verified,
      updatedAt: Date.now(),
    });
    
    // Update profile if needed
    if (args.updates.metadata?.bio || args.updates.name || args.updates.imageUrl) {
      const profile = await ctx.db
        .query("userProfiles")
        .withIndex("by_user", (q) => q.eq("userId", user._id))
        .first();
      
      if (profile) {
        await ctx.db.patch(profile._id, {
          displayName: args.updates.name,
          bio: args.updates.metadata?.bio,
          avatarUrl: args.updates.imageUrl,
        });
      }
    }
    
    return user._id;
  },
});

3. Webhook Handler

// apps/webs/auth/app/api/webhooks/clerk/route.ts
import { Webhook } from 'svix'
import { headers } from 'next/headers'
import { WebhookEvent } from '@clerk/nextjs/server'
import { api } from '@workspace/convex/_generated/api'
import { ConvexHttpClient } from 'convex/browser'

const convex = new ConvexHttpClient(process.env.NEXT_PUBLIC_CONVEX_URL!)

export async function POST(req: Request) {
  const WEBHOOK_SECRET = process.env.CLERK_WEBHOOK_SECRET

  if (!WEBHOOK_SECRET) {
    throw new Error('Missing CLERK_WEBHOOK_SECRET')
  }

  const headerPayload = await headers()
  const svix_id = headerPayload.get("svix-id")
  const svix_timestamp = headerPayload.get("svix-timestamp")
  const svix_signature = headerPayload.get("svix-signature")

  if (!svix_id || !svix_timestamp || !svix_signature) {
    return new Response('Error occured -- no svix headers', {
      status: 400
    })
  }

  const payload = await req.json()
  const body = JSON.stringify(payload)

  const wh = new Webhook(WEBHOOK_SECRET)
  let evt: WebhookEvent

  try {
    evt = wh.verify(body, {
      "svix-id": svix_id,
      "svix-timestamp": svix_timestamp,
      "svix-signature": svix_signature,
    }) as WebhookEvent
  } catch (err) {
    console.error('Error verifying webhook:', err)
    return new Response('Error occured', {
      status: 400
    })
  }

  const eventType = evt.type

  if (eventType === 'user.created') {
    const { id, email_addresses, first_name, last_name, image_url, public_metadata } = evt.data

    await convex.mutation(api.syncUser.createUserFromClerk, {
      clerkId: id,
      email: email_addresses[0].email_address,
      name: `${first_name || ''} ${last_name || ''}`.trim() || null,
      givenName: first_name,
      familyName: last_name,
      imageUrl: image_url,
      metadata: {
        roles: public_metadata.roles as string[] || ['user'],
        verified: email_addresses[0].verified || false,
      },
    })
  }

  if (eventType === 'user.updated') {
    const { id, email_addresses, first_name, last_name, image_url, public_metadata } = evt.data

    await convex.mutation(api.syncUser.updateUserFromClerk, {
      clerkId: id,
      updates: {
        email: email_addresses[0].email_address,
        name: `${first_name || ''} ${last_name || ''}`.trim() || null,
        givenName: first_name,
        familyName: last_name,
        imageUrl: image_url,
        metadata: {
          roles: public_metadata.roles as string[] || ['user'],
          verified: email_addresses[0].verified || false,
          bio: public_metadata.bio as string,
        },
      },
    })
  }

  if (eventType === 'user.deleted') {
    // Handle user deletion
    const { id } = evt.data
    await convex.mutation(api.syncUser.deleteUserByClerkId, {
      clerkId: id,
    })
  }

  return new Response('', { status: 200 })
}

Migration Strategy

Phase 1: Dual-Write (Current State)

  1. Keep existing Convex auth for backward compatibility
  2. Add Clerk ID to existing users
  3. Sync new users from Clerk to Convex
  4. Update both systems on user changes

Phase 2: Clerk Primary

  1. Make Clerk the primary auth source
  2. Use Convex for additional user data only
  3. Migrate authentication checks to Clerk
  4. Keep Convex for real-time features

Phase 3: Full Migration

  1. Remove Convex auth dependencies
  2. Clean up legacy auth fields
  3. Optimize data models
  4. Archive migration code

Data Consistency

1. Conflict Resolution

// Priority order for data conflicts
const DATA_PRIORITY = {
  email: 'clerk',      // Always use Clerk email
  name: 'clerk',       // Always use Clerk name
  roles: 'clerk',      // Always use Clerk roles
  bio: 'convex',       // User-edited in app
  avatar: 'convex',    // User-uploaded in app
}

2. Eventual Consistency

  • Use webhooks for async updates
  • Implement retry logic for failed syncs
  • Log all sync operations
  • Monitor sync lag metrics

3. Data Validation

// Validation rules
const validateSyncData = (data: SyncData) => {
  // Email format
  if (!isValidEmail(data.email)) {
    throw new Error('Invalid email format')
  }
  
  // Role validation
  if (data.roles && !data.roles.every(isValidRole)) {
    throw new Error('Invalid role')
  }
  
  // Required fields
  if (!data.clerkId || !data.email) {
    throw new Error('Missing required fields')
  }
}

Error Handling

1. Webhook Failures

  • Implement exponential backoff
  • Dead letter queue for failed events
  • Manual retry mechanism
  • Alert on repeated failures

2. Data Integrity

  • Transaction support where possible
  • Rollback mechanisms
  • Data validation at each layer
  • Audit logs for all changes

3. Recovery Procedures

// Manual sync function
export const manualSyncUser = async (clerkId: string) => {
  try {
    // Fetch from Clerk
    const clerkUser = await clerkClient.users.getUser(clerkId)
    
    // Sync to Convex
    await syncUserToConvex(clerkUser)
    
    // Verify sync
    const convexUser = await getConvexUser(clerkId)
    
    // Log success
    await logSync({
      clerkId,
      status: 'success',
      timestamp: Date.now(),
    })
  } catch (error) {
    // Log failure
    await logSync({
      clerkId,
      status: 'failed',
      error: error.message,
      timestamp: Date.now(),
    })
    
    throw error
  }
}

Performance Considerations

1. Caching Strategy

  • Cache Clerk user data in Convex
  • Use TTL for cache invalidation
  • Implement cache warming
  • Monitor cache hit rates

2. Batch Operations

  • Batch webhook processing
  • Bulk user migrations
  • Scheduled sync jobs
  • Rate limiting protection

3. Query Optimization

// Optimized user lookup
export const getUserWithProfile = query({
  args: { clerkId: v.string() },
  handler: async (ctx, args) => {
    // Single query with join
    const userWithProfile = await ctx.db
      .query("users")
      .withIndex("by_clerk_id", (q) => q.eq("clerkId", args.clerkId))
      .first()
    
    if (!userWithProfile) return null
    
    const profile = await ctx.db
      .query("userProfiles")
      .withIndex("by_user", (q) => q.eq("userId", userWithProfile._id))
      .first()
    
    return {
      ...userWithProfile,
      profile,
    }
  },
})

Monitoring and Observability

1. Key Metrics

  • Sync success rate
  • Sync latency
  • Data consistency score
  • Webhook processing time

2. Alerts

  • Failed sync threshold
  • Data inconsistency detection
  • Webhook timeout alerts
  • Rate limit warnings

3. Dashboards

  • Real-time sync status
  • User migration progress
  • Error trend analysis
  • Performance metrics

Security Considerations

1. Data Access

  • Clerk ID as primary identifier
  • Role-based access control
  • Data encryption at rest
  • Audit trail for changes

2. Webhook Security

  • Signature verification
  • IP allowlisting
  • Rate limiting
  • Request validation

3. Privacy Compliance

  • GDPR data handling
  • User consent tracking
  • Data retention policies
  • Right to deletion support

On this page