1. React Native
  2. On-Device AI Scanning (OCR)

React Native

On-Device AI Scanning (OCR)

The Vision SDK allows for on-device AI scanning (OCR), enabling offline extraction of structured information from documents such as shipping labels. This is ideal when low latency and offline functionality are critical, such as in warehouse and logistics environments.


🛠️ Step 1: Preparing On-Device OCR

Before using the on-device OCR, you must prepare the model using the imperative method:configureOnDeviceModel and set ocrMode prop to 'onDevice'. This ensures the necessary AI models are downloaded and ready to use.

Model Size and Type Configuration

If you want Vision SDK to automatically determine the best model size based on your PackageX subscription:

        import VisionSdkView, { VisionSdkRefProps } from 'react-native-vision-sdk'
import { useRef, useEffect } from 'react'

const Example = () => {
    const vsdkRef = useRef<VisionSdkRefProps>(null);
    useEffect(() => {
        const timeout = setTimeout(() => {
            vsdkRef.current.startRunningHandler() // this method starts the camera feed
            vsdkRef.current.configureOnDeviceModel({
                type: 'shipping_label', // can be one of ['shipping_label', 'item_label', 'bill_of_lading', 'document_classification']
                size: 'large' //this is an optional parameter, exclude if you want the sdk to decide the appropriate size by itself. Can be one of ['large', 'micro']
            })
            
        }, 0)

        return () => {
            clearTimeout(timeout)
        }
    }, [])



    return (
        <View style={{flex: 1}}>
            <VisionSdkView 
                ref={vsdkRef}
                mode='ocr'
                ocrMode='on_device'
                ocrType='shipping_label'

            />
        </View>
    )
}


      
NOTE

Please note that you need to wait before the model is successfully downloaded and configured. This would be a one time operation only. More on this in the next section.


⚙️ Android Model Execution Provider Configuration

For Android devices, you can configure the OCR model execution provider to optimize performance based on device capabilities using the modelExecutionProviderAndroid prop.

Available Providers

  • CPU (Default): Uses CPU for model execution - most compatible but slower
  • NNAPI: Uses Android Neural Networks API for hardware acceleration
  • XNNPACK: Uses XNNPACK backend for optimized CPU execution
        import VisionSdkView, { VisionSdkRefProps } from 'react-native-vision-sdk'
import { useRef, useEffect } from 'react'

const Example = () => {
    const vsdkRef = useRef<VisionSdkRefProps>(null);

    useEffect(() => {
        const timeout = setTimeout(() => {
            vsdkRef.current.startRunningHandler()
            vsdkRef.current.configureOnDeviceModel({
                type: 'shipping_label',
                size: 'large'
            })
        }, 0)

        return () => {
            clearTimeout(timeout)
        }
    }, [])

    return (
        <View style={{flex: 1}}>
            <VisionSdkView
                ref={vsdkRef}
                mode='ocr'
                ocrMode='on-device'
                ocrType='shipping_label'
                modelExecutionProviderAndroid='NNAPI' // Configure execution provider
            />
        </View>
    )
}

      

Provider Selection Guidelines

  • Use CPU for maximum compatibility across all Android devices
  • Use NNAPI for newer Android devices (API level 27+) with neural processing units
  • Use XNNPACK for optimized CPU performance on devices without dedicated AI hardware
NOTE

The modelExecutionProviderAndroid prop only affects Android devices. iOS devices automatically use the most appropriate execution method.

🧩 Model download progress events

You need to set onModelDownloadProgress event handler to receive download progress events

        const Example = () => {
    const vsdkRef = useRef<VisionSdkRefProps>(null);
    useEffect(() => {
        const timeout = setTimeout(() => {
            vsdkRef.current.startRunningHandler() // this method starts the camera feed
            vsdkRef.current.configureOnDeviceModel({
                type: 'shipping_label', // can be one of ['shipping_label', 'item_label', 'bill_of_lading', 'document_classification']
                size: 'large' //this is an optional parameter, exclude if you want the sdk to decide the appropriate size by itself. Can be one of ['large', 'micro']
            })
            
        }, 0)

        return () => {
            clearTimeout(timeout)
        }
    }, [])

    const handleDownloadProgress = (data) => {
        /*
        The data argument has the following structure:
        {
            progress: number,  //download percentage (0-100)
            downloadStatus: boolean, //whether the model is downloaded
            isReady: boolean // whether the model is ready for use (recommended check)
        }
        */

    }

    // Use isReady instead of downloadStatus for better reliability
    if (data.isReady) {
        console.log('Model is ready for use!');
        // You can now safely capture images for OCR processing
    } else if (data.progress < 100) {
        console.log(`Download progress: ${data.progress}%`);
        // Show loading indicator to user
    }


    return (
        <View style={{flex: 1}}>
            <VisionSdkView 
                ref={vsdkRef}
                mode='ocr'
                ocrMode='on_device'
                ocrType='shipping_label'
                onModelDownloadProgress={handleDownloadProgress}

            />
        </View>
    )
}

      

📝 Make sure the model is prepared successfully before scanning. Each model may vary in size and complexity depending on the document type.

🧠 Step 2: Extracting Data from Image

Once the model is prepared successfully, make sure you have ocrMode and ocrType props set appropriately. After that you can call imperative method cameraCaptureHandler to capture the image, the response will be available in onOCRScan event handler.

        const Example = () => {
    const vsdkRef = useRef<VisionSdkRefProps>(null);
    useEffect(() => {
        const timeout = setTimeout(() => {
            vsdkRef.current.startRunningHandler() // this method starts the camera feed
            vsdkRef.current.configureOnDeviceModel({
                type: 'shipping_label', // can be one of ['shipping_label', 'item_label', 'bill_of_lading', 'document_classification']
                size: 'large' //this is an optional parameter, exclude if you want the sdk to decide the appropriate size by itself. Can be one of ['large', 'micro']
            })
            
        }, 0)

        return () => {
            clearTimeout(timeout)
        }
    }, [])

    const handleDownloadProgress = (data) => {
        /*
        The data argument has the following structure:
        {
            progress: number,  //download percentage
            downloadStatus: boolean, //whether the model is downloaded,
            isReady: boolean // whether the model is ready for use
        }
        */

    }

    const handleCapture = () => {
        vsdkRef?.current?.cameraCaptureHandler()
    }

    const handleOCRScan = (event) => {
        //event.data contains the extracted information from the document
    }

    return (
        <View style={{flex: 1}}>
            <VisionSdkView 
                ref={vsdkRef}
                mode='ocr'
                ocrMode='on_device'
                ocrType='shipping_label'
                onModelDownloadProgress={handleDownloadProgress}
                onOCRScan={handleOCRScan}
            />

            <TouchableOpacity onPress={handleCapture}>
                <Text>Capture</Text>
            </TouchableOpacity>
        </View>
    )
}

      

🔁 The returned data follows the same structure as the PackageX Cloud OCR API response.