Skip to content

Performance Tuning Guide

Overview

This guide covers performance optimization techniques for NetIntel-OCR, including model optimization, processing parallelization, caching strategies, and resource management.

Performance Architecture

graph TB
    A[Input Documents] --> B[Queue Manager]
    B --> C[Parallel Workers]
    C --> D[Model Pool]
    C --> E[Cache Layer]
    D --> F[GPU/CPU Resources]
    E --> G[Results]
    F --> G

Benchmarking

Performance Testing

# Run performance benchmark
netintel-ocr system benchmark

# Benchmark specific operation
netintel-ocr system benchmark --operation processing
netintel-ocr system benchmark --operation vector-search
netintel-ocr system benchmark --operation kg-query

# Custom benchmark
netintel-ocr system benchmark --custom \
  --documents 100 \
  --parallel 4 \
  --duration 300

Performance Baseline

# Establish baseline
netintel-ocr system baseline create

# Compare with baseline
netintel-ocr system baseline compare

# Export performance report
netintel-ocr system baseline report --output perf-report.html

Model Optimization

Model Selection

# List models by performance
netintel-ocr model list --sort-by speed

# Compare model performance
netintel-ocr model compare \
  --models "qwen2.5vl:7b,llava:13b,minicpm-v:latest" \
  --metric speed

# Recommend optimal model
netintel-ocr model recommend \
  --priority speed \
  --accuracy-threshold 0.8

Model Performance Profiles

Model Speed Accuracy Memory Use Case
minicpm-v:latest Fast Good 2GB Quick processing
qwen2.5vl:7b Balanced Excellent 7GB Production
llava:13b Slow Best 13GB High accuracy
bakllava:latest Very Fast Fair 1GB Batch processing

Model Configuration

# Use fast models for batch
netintel-ocr config set models.batch "bakllava:latest"

# Use accurate models for critical docs
netintel-ocr config set models.critical "llava:13b"

# Configure model-specific settings
netintel-ocr model config qwen2.5vl:7b \
  --max-context 2048 \
  --temperature 0.1 \
  --num-predict 1024

Model Preloading

# Preload frequently used models
netintel-ocr model preload qwen2.5vl:7b
netintel-ocr model preload minicpm-v:latest

# Keep models in memory
netintel-ocr model keep-loaded --models "qwen2.5vl:7b,minicpm-v:latest"

# Configure model pool
netintel-ocr config set performance.model_pool.size 3
netintel-ocr config set performance.model_pool.ttl 3600

Processing Optimization

Parallel Processing

# Optimize parallel workers
netintel-ocr config set processing.max_parallel $(nproc)

# Configure by document type
netintel-ocr config set processing.parallel.text 8
netintel-ocr config set processing.parallel.network 4
netintel-ocr config set processing.parallel.flow 4

# Dynamic scaling
netintel-ocr config set processing.scaling.enabled true
netintel-ocr config set processing.scaling.min_workers 2
netintel-ocr config set processing.scaling.max_workers 16

Batch Processing Optimization

# Optimize batch size
netintel-ocr config set processing.batch_size 50

# Enable streaming
netintel-ocr config set processing.streaming.enabled true

# Configure pipeline
netintel-ocr config set processing.pipeline.stages 3
netintel-ocr config set processing.pipeline.buffer_size 100

Processing Strategies

# Fast mode - speed priority
netintel-ocr process file document.pdf --fast-mode

# Balanced mode - default
netintel-ocr process file document.pdf

# Accurate mode - quality priority
netintel-ocr process file document.pdf --accurate-mode

# Custom strategy
netintel-ocr process file document.pdf \
  --strategy custom \
  --model minicpm-v:latest \
  --parallel 8 \
  --no-context \
  --skip-validation

GPU Acceleration

GPU Configuration

# Enable GPU
netintel-ocr config set performance.gpu.enabled true

# Select GPU device
netintel-ocr config set performance.gpu.device 0

# Configure GPU memory
netintel-ocr config set performance.gpu.memory_fraction 0.8

# Multi-GPU setup
netintel-ocr config set performance.gpu.devices "0,1"
netintel-ocr config set performance.gpu.strategy "data_parallel"

GPU Optimization

# Check GPU utilization
netintel-ocr system gpu status

# Optimize GPU batch size
netintel-ocr system gpu optimize --auto

# Monitor GPU performance
netintel-ocr system gpu monitor --interval 1

CUDA Settings

# Set CUDA environment
export CUDA_VISIBLE_DEVICES=0,1
export CUDA_LAUNCH_BLOCKING=0
export CUDA_CACHE_PATH=/tmp/cuda_cache

# Configure CUDA in NetIntel
netintel-ocr config set performance.cuda.enabled true
netintel-ocr config set performance.cuda.allow_growth true
netintel-ocr config set performance.cuda.per_process_memory 4096

Caching Strategies

Cache Configuration

# Enable caching
netintel-ocr config set cache.enabled true
netintel-ocr config set cache.dir ~/.cache/netintel-ocr
netintel-ocr config set cache.size 10GB

# Configure cache layers
netintel-ocr config set cache.layers.memory.enabled true
netintel-ocr config set cache.layers.memory.size 1GB
netintel-ocr config set cache.layers.disk.enabled true
netintel-ocr config set cache.layers.disk.size 10GB

Cache Types

# Model cache
netintel-ocr config set cache.models.enabled true
netintel-ocr config set cache.models.ttl 86400

# Results cache
netintel-ocr config set cache.results.enabled true
netintel-ocr config set cache.results.ttl 3600

# Embedding cache
netintel-ocr config set cache.embeddings.enabled true
netintel-ocr config set cache.embeddings.ttl 604800

Cache Management

# View cache statistics
netintel-ocr cache stats

# Clear specific cache
netintel-ocr cache clear --type models
netintel-ocr cache clear --type results

# Warm up cache
netintel-ocr cache warmup --models "qwen2.5vl:7b,minicpm-v:latest"

# Export/import cache
netintel-ocr cache export --output cache-backup.tar.gz
netintel-ocr cache import --input cache-backup.tar.gz

Memory Management

Memory Configuration

# Set memory limits
netintel-ocr config set performance.memory.max_usage 8GB
netintel-ocr config set performance.memory.per_worker 2GB

# Enable memory optimization
netintel-ocr config set performance.memory.optimize true
netintel-ocr config set performance.memory.gc_threshold 0.8

# Configure swap
netintel-ocr config set performance.memory.swap.enabled true
netintel-ocr config set performance.memory.swap.path /tmp/netintel-swap
netintel-ocr config set performance.memory.swap.size 16GB

Memory Profiling

# Monitor memory usage
netintel-ocr system memory monitor

# Find memory leaks
netintel-ocr system memory leaks --duration 300

# Optimize memory allocation
netintel-ocr system memory optimize

Database Optimization

Vector Store Optimization

# Optimize Milvus
netintel-ocr db optimize --type milvus

# Configure index
netintel-ocr config set db.milvus.index_type IVF_SQ8
netintel-ocr config set db.milvus.nlist 1024
netintel-ocr config set db.milvus.nprobe 16

# Build optimized index
netintel-ocr db rebuild-index --optimize

Query Optimization

# Analyze query performance
netintel-ocr db analyze-queries

# Optimize slow queries
netintel-ocr db optimize-queries --threshold 1000ms

# Configure query cache
netintel-ocr config set db.query_cache.enabled true
netintel-ocr config set db.query_cache.size 100MB

Network Optimization

API Performance

# Configure connection pooling
netintel-ocr config set server.api.connection_pool.size 100
netintel-ocr config set server.api.connection_pool.timeout 30

# Enable compression
netintel-ocr config set server.api.compression.enabled true
netintel-ocr config set server.api.compression.level 6

# Configure keep-alive
netintel-ocr config set server.api.keep_alive.enabled true
netintel-ocr config set server.api.keep_alive.timeout 120

Request Optimization

# Enable request batching
netintel-ocr config set server.api.batching.enabled true
netintel-ocr config set server.api.batching.max_size 10
netintel-ocr config set server.api.batching.timeout 100ms

# Configure rate limiting
netintel-ocr config set server.api.rate_limit.enabled true
netintel-ocr config set server.api.rate_limit.requests_per_second 100

Performance Profiles

Create Performance Profiles

# High throughput profile
netintel-ocr config profile create high-throughput
netintel-ocr config set processing.max_parallel 16 --profile high-throughput
netintel-ocr config set models.default minicpm-v:latest --profile high-throughput
netintel-ocr config set cache.aggressive true --profile high-throughput

# Low latency profile
netintel-ocr config profile create low-latency
netintel-ocr config set processing.max_parallel 4 --profile low-latency
netintel-ocr config set models.preload all --profile low-latency
netintel-ocr config set cache.layers.memory.size 4GB --profile low-latency

# Resource constrained profile
netintel-ocr config profile create resource-constrained
netintel-ocr config set processing.max_parallel 2 --profile resource-constrained
netintel-ocr config set performance.memory.max_usage 2GB --profile resource-constrained
netintel-ocr config set models.default bakllava:latest --profile resource-constrained

Apply Performance Profile

# Switch to performance profile
netintel-ocr config profile use high-throughput

# Apply temporarily
netintel-ocr --profile low-latency process file document.pdf

# Compare profiles
netintel-ocr config profile compare default high-throughput

Performance Monitoring

Real-time Monitoring

# Monitor performance metrics
netintel-ocr monitor performance --real-time

# Watch specific metrics
netintel-ocr monitor performance \
  --metrics "throughput,latency,cpu,memory" \
  --interval 1

# Export performance data
netintel-ocr monitor performance --export perf-data.csv

Performance Alerts

# Set performance thresholds
netintel-ocr monitor alerts add \
  --name slow-processing \
  --metric processing_time \
  --threshold 60s \
  --severity warning

netintel-ocr monitor alerts add \
  --name high-memory \
  --metric memory_usage \
  --threshold 90% \
  --severity critical

Optimization Strategies

Document Type Optimization

# Text-heavy documents
netintel-ocr process file text-doc.pdf \
  --model Nanonets-OCR-s:latest \
  --text-only \
  --parallel 8

# Diagram-heavy documents
netintel-ocr process file network-doc.pdf \
  --model qwen2.5vl:7b \
  --network-only \
  --parallel 4

# Mixed documents
netintel-ocr process file mixed-doc.pdf \
  --model qwen2.5vl:7b \
  --adaptive-processing

Batch Processing Optimization

# Sort by size for optimal batching
ls -S *.pdf | xargs netintel-ocr process batch

# Process similar documents together
netintel-ocr process batch \
  --group-by similarity \
  --batch-size 20

# Adaptive batch sizing
netintel-ocr process batch \
  --adaptive-batch \
  --min-batch 5 \
  --max-batch 50

Troubleshooting Performance

Identify Bottlenecks

# Run performance diagnostic
netintel-ocr system diagnose --performance

# Profile specific operation
netintel-ocr system profile \
  --operation "process file test.pdf" \
  --detailed

# Analyze bottlenecks
netintel-ocr system analyze-bottlenecks

Common Performance Issues

Slow Processing

# Check model performance
netintel-ocr model benchmark --current

# Optimize settings
netintel-ocr system optimize --auto

# Use faster model
netintel-ocr config set models.default minicpm-v:latest

High Memory Usage

# Reduce parallel workers
netintel-ocr config set processing.max_parallel 2

# Clear caches
netintel-ocr cache clear --all

# Enable memory limits
netintel-ocr config set performance.memory.max_usage 4GB

GPU Underutilization

# Check GPU status
nvidia-smi
netintel-ocr system gpu status

# Increase batch size
netintel-ocr config set processing.gpu_batch_size 8

# Enable GPU optimization
netintel-ocr system gpu optimize

Best Practices

1. Profile Before Optimizing

# Always benchmark first
netintel-ocr system benchmark --baseline

# Make changes
netintel-ocr config set ...

# Compare results
netintel-ocr system benchmark --compare baseline

2. Start with Quick Wins

  • Enable caching
  • Preload models
  • Use appropriate parallelization
  • Choose right model for task

3. Monitor Continuously

# Set up monitoring
netintel-ocr monitor performance --daemon

# Review daily reports
netintel-ocr monitor report --daily

4. Optimize for Your Workload

# Analyze workload patterns
netintel-ocr system analyze-workload --duration 7d

# Get optimization recommendations
netintel-ocr system recommend --based-on-workload

Next Steps