Skip to content

Data processing related API

Workflow

Create a workflow

POST /byoa/api/v1/index_workflow

Input parameters:

Parameters Required Meaning
name Yes Workflow Name
source_volume_names Yes Original volume name
source_volume_ids Yes Original volume id
target_volume_name Yes Target volume name
target_volume_ids Yes Target volume id
create_target_volume_name No Create new target volume name
process_mode Yes Process mode, interval: 0: one-time processing; 1:5 minutes; 2:10 minutes; 3:30 minutes; 4:1 hour; 5:2 hours; 6:4 hours; 7:6 hours; 8:8 hours; 9: one day;
file_types Yes File type, currently only supports 2, pdf type
workflow Yes Workflow, split_length: The maximum length of segments, the minimum is 100, the maximum is 2000; Text preprocessing rules: remove_empty_lines: true means replace consecutive spaces, newlines and tabs, remove_extra_whitespaces: true means delete all URLs and email addresses.

Example:

import requests
import json

url = "https://freetier-01.cn-hangzhou.cluster.cn-dev.matrixone.tech/byoa/api/v1/index_workflow"

headers = {
    "user-id":"0194dfaa-3eda-7ea5-b47c-b4f4f594xxxx",
    "Access-Token": "xxxx",
    "uid": "dea010be-1a50-413a-aa7e-e0611a491cab-0194dfaa-3eda-7ea5-b47c-b4f4f594xxxx:admin:accountadmin",
}

body = {
    "name":"wf-3",
    "source_volume_names":[
        "b-vol1"
    ],
    "source_volume_ids":[
        "1889223879880048640"
    ],
    "target_volume_name":"a-vol1",
    "target_volume_id":"eb42f0a1-ab18-4010-b95c-cd1716dd5e95",
    "create_target_volume_name":"",
    "process_mode":{
        "interval":0,
        "offset":0
    },
    "file_types":[
        2
    ],
    "workflow":{
        "components":[
            {
                "name":"DocumentCleaner",
                "type":"haystack.components.preprocessors.document_cleaner.DocumentCleaner",
                "component_id":"DocumentCleaner_1739377283742",
                "intro":"DocumentCleaner",
                "position":{
                    "x":0,
                    "y":0
                },
                "input_keys":{

                },
                "output_keys":{

                },
                "init_parameters":{
                    "ascii_only":"false",
                    "keep_id":"false",
                    "remove_empty_lines":"true",
                    "remove_extra_whitespaces":"true",
                    "remove_regex":"([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\\.[a-zA-Z0-9-.]+)|https?://[^\\s]+",
                    "remove_repeated_substrings":"false",
                    "remove_substrings":"null",
                    "unicode_normalization":"null"
                }
            },
            {
                "name":"DocumentCleaner-ImageCaption",
                "type":"haystack.components.preprocessors.document_cleaner.DocumentCleaner",
                "component_id":"DocumentCleaner-ImageCaption_1739377283742",
                "intro":"DocumentCleaner-ImageCaption",
                "position":{
                    "x":0,
                    "y":0
                },
                "input_keys":{

                },
                "output_keys":{

                },
                "init_parameters":{
                    "ascii_only":"false",
                    "keep_id":"false",
                    "remove_empty_lines":"true",
                    "remove_extra_whitespaces":"true",
                    "remove_regex":"([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\\.[a-zA-Z0-9-.]+)|https?://[^\\s]+",
                    "remove_repeated_substrings":"false",
                    "remove_substrings":"null",
                    "unicode_normalization":"null"
                }
            },
            {
                "name":"DocumentCleaner-ImageOCR",
                "type":"haystack.components.preprocessors.document_cleaner.DocumentCleaner",
                "component_id":"DocumentCleaner-ImageOCR_1739377283742",
                "intro":"DocumentCleaner-ImageOCR",
                "position":{
                    "x":0,
                    "y":0
                },
                "input_keys":{

                },
                "output_keys":{

                },
                "init_parameters":{
                    "ascii_only":"false",
                    "keep_id":"false",
                    "remove_empty_lines":"true",
                    "remove_extra_whitespaces":"true",
                    "remove_regex":"([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\\.[a-zA-Z0-9-.]+)|https?://[^\\s]+",
                    "remove_repeated_substrings":"false",
                    "remove_substrings":"null",
                    "unicode_normalization":"null"
                }
            },
            {
                "name":"DocumentEmbedder",
                "type":"haystack.components.embedders.openai_document_embedder.OpenAIDocumentEmbedder","component_id":"DocumentEmbedder_1739377283742",
                "intro":"DocumentEmbedder",
                "position":{
                    "x":0,
                    "y":0
                },
                "input_keys":{

                },
                "output_keys":{

                },
                "init_parameters":{
                    "api_base_url":"https://api.siliconflow.cn/v1",
                    "api_key":{
                        "env_vars":[
                            "OPENAI_API_KEY"
                        ],
                        "strict":"true",
                        "type":"env_var"
                    },
                    "batch_size":32,
                    "dimensions":"null",
                    "embedding_separator":"\n",
                    "meta_fields_to_embed":[

                    ],
                    "model":"BAAI/bge-m3",
                    "organization":"null",
                    "prefix":"",
                    "progress_bar":"true",
                    "suffix":""
                }
            },
            {
                "name":"DocumentJoiner",
                "type":"haystack.components.joiners.document_joiner.DocumentJoiner",
                "component_id":"DocumentJoiner_1739377283742",
                "intro":"DocumentJoiner",
                "position":{
                    "x":0,
                    "y":0
                },
                "input_keys":{

                },
                "output_keys":{

                },
                "init_parameters":{
                    "join_mode":"concatenate",
                    "sort_by_score":"true",
                    "top_k":"null",
                    "weights":"null"
                }
            },
            {
                "name":"DocumentJoiner-Result",
                "type":"haystack.components.joiners.document_joiner.DocumentJoiner",
                "component_id":"DocumentJoiner-Result_1739377283742",
                "intro":"DocumentJoiner-Result",
                "position":{
                    "x":0,
                    "y":0
                },
                "input_keys":{

                },
                "output_keys":{

                },
                "init_parameters":{
                    "join_mode":"concatenate",
                    "sort_by_score":"true",
                    "top_k":"null",
                    "weights":"null"
                }
            },
            {
                "name":"DocumentSplitter",
                "type":"haystack.components.preprocessors.document_splitter.DocumentSplitter",
                "component_id":"DocumentSplitter_1739377283742",
                "intro":"DocumentSplitter",
                "position":{
                    "x":0,
                    "y":0
                },
                "input_keys":{

                },
                "output_keys":{

                },
                "init_parameters":{
                    "split_by":"word",
                    "split_length":800,
                    "split_overlap":200,
                    "split_threshold":0
                }
            },
            {
                "name":"DocumentWriter",
                "type":"haystack.components.writers.document_writer.DocumentWriter",
                "component_id":"DocumentWriter_1739377283742",
                "intro":"DocumentWriter",
                "position":{
                    "x":0,
                    "y":0
                },
                "input_keys":{

                },
                "output_keys":{

                },
                "init_parameters":{
                    "document_store":{
                        "init_parameters":{
                            "connection_string":{
                                "env_vars":[
                                    "DATABASE_SYNC_URI"
                                ],
                                "strict":"true",
                                "type":"env_var"
                            },
                            "embedding_dimension":1024,
                            "keyword_index_name":"haystack_keyword_index",
                            "recreate_table":"true",
                            "table_name":"embedding_results",
                            "vector_function":"cosine_similarity"
                        },
                        "type":"byoa.integrations.document_stores.mo_document_store.MOIDocumentStore"
                    },
                    "policy":"NONE"
                }
            },
            {
                "name":"FileRouterComponent",
                "type":"haystack.components.routers.file_type_router.FileTypeRouter",
                "component_id":"FileRouterComponent_1739377283742","intro":"FileRouterComponent",
                "position":{
                    "x":0,
                    "y":0
                },
                "input_keys":{

                },
                "output_keys":{

                },
                "init_parameters":{
                    "additional_mimetypes":"null",
                    "mime_types":[
                        "text/plain",
                        "text/markdown",
                        "image/.*",
                        "application/pdf"
                    ]
                }
            },
            {
                "name":"ImageCaptionToDocument",
                "type":"byoa.integrations.components.converters.image_caption_to_document.ImageCaptionToDocument",
                "component_id":"ImageCaptionToDocument_1739377283742",
                "intro":"ImageCaptionToDocument",
                "position":{
                    "x":0,
                    "y":0
                },
                "input_keys":{

                },
                "output_keys":{

                },
                "init_parameters":{

                }
            },
            {
                "name":"ImageOCRToDocument",
                "type":"byoa.integrations.components.converters.image_ocr_to_document.ImageOCRToDocument",
                "component_id":"ImageOCRToDocument_1739377283742",
                "intro":"ImageOCRToDocument",
                "position":{
                    "x":0,
                    "y":0
                },
                "input_keys":{

                },
                "output_keys":{

                },
                "init_parameters":{
                    "model":"ucaslcl/GOT-OCR2_0",
                    "tokenizer":"stepfun-ai/GOT-OCR2_0"
                }
            },
            {
                "name":"ImageToDocument",
                "type":"byoa.integrations.components.converters.image_to_document.ImageToDocument",
                "component_id":"ImageToDocument_1739377283742",
                "intro":"ImageToDocument",
                "position":{
                    "x":0,
                    "y":0
                },
                "input_keys":{

                },
                "output_keys":{

                },
                "init_parameters":{

                }
            },
            {
                "name":"MagicPDFToDocument",
                "type":"byoa.integrations.components.converters.magic_pdf_to_document.MagicPDFToDocument",
                "component_id":"MagicPDFToDocument_1739377283742",
                "intro":"MagicPDFToDocument",
                "position":{
                    "x":0,
                    "y":0
                },
                "input_keys":{

                },
                "output_keys":{

                },
                "init_parameters":{

                }
            },
            {
                "name":"MarkdownToDocument",
                "type":"haystack.components.converters.markdown.MarkdownToDocument",
                "component_id":"MarkdownToDocument_1739377283742",
                "intro":"MarkdownToDocument",
                "position":{
                    "x":0,
                    "y":0
                },
                "input_keys":{

                },
                "output_keys":{

                },
                "init_parameters":{
                    "progress_bar":"false",
                    "table_to_single_line":"false"
                }
            },
            {
                "name":"MetadataRouter",
                "type":"haystack.components.routers.metadata_router.MetadataRouter",
                "component_id":"MetadataRouter_1739377283742",
                "intro":"MetadataRouter",
                "position":{
                    "x":0,
                    "y":0
                },
                "input_keys":{

                },
                "output_keys":{

                },
                "init_parameters":{
                    "rules":{
                        "image":{
                            "conditions":[
                                {
                                    "field":"meta.content_type",
                                    "operator":"==",
                                    "value":"image"
                                }
                            ],
                            "operator":"AND"
                        },
                        "text":{
                            "conditions":[
                                {
                                    "field":"meta.content_type",
                                    "operator":"==",
                                    "value":"text"
                                }],
                            "operator":"AND"
                        }
                    }
                }
            },
            {
                "name":"PythonExecutor",
                "component_id":"PythonExecutor_1739377283742",
                "intro":"PythonExecutor",
                "position":{
                    "x":0,
                    "y":0
                },
                "input_keys":{

                },
                "output_keys":{

                },
                "init_parameters":{
                    "python_code":""
                },
                "type":"byoa.integrations.components.python_executor.PythonExecutor"
            },
            {
                "name":"TextFileToDocument",
                "type":"haystack.components.converters.txt.TextFileToDocument",
                "component_id":"TextFileToDocument_1739377283742",
                "intro":"TextFileToDocument",
                "position":{
                    "x":0,
                    "y":0
                },
                "input_keys":{

                },
                "output_keys":{

                },
                "init_parameters":{
                    "encoding":"utf8"
                }
            }
        ],
        "connections":[
            {
                "receiver":"TextFileToDocument.sources",
                "sender":"FileRouterComponent.text/plain"
            },
            {
                "receiver":"MarkdownToDocument.sources",
                "sender":"FileRouterComponent.text/markdown"
            },
            {
                "receiver":"ImageToDocument.sources",
                "sender":"FileRouterComponent.image/.*"
            },
            {
                "receiver":"MagicPDFToDocument.sources",
                "sender":"FileRouterComponent.application/pdf"
            },
            {
                "receiver":"DocumentJoiner.documents",
                "sender":"TextFileToDocument.documents"
            },
            {
                "receiver":"DocumentJoiner.documents",
                "sender":"MarkdownToDocument.documents"
            },
            {
                "receiver":"DocumentJoiner.documents",
                "sender":"MagicPDFToDocument.documents"
            },
            {
                "receiver":"DocumentJoiner.documents",
                "sender":"ImageToDocument.documents"
            },
            {
                "receiver":"MetadataRouter.documents",
                "sender":"DocumentJoiner.documents"
            },
            {
                "receiver":"DocumentCleaner.documents",
                "sender":"MetadataRouter.text"
            },
            {
                "receiver":"ImageOCRToDocument.documents",
                "sender":"MetadataRouter.image"
            },
            {
                "receiver":"ImageCaptionToDocument.documents",
                "sender":"MetadataRouter.image"
            },
            {
                "receiver":"DocumentSplitter.documents",
                "sender":"DocumentCleaner.documents"
            },
            {
                "receiver":"DocumentJoiner-Result.documents",
                "sender":"DocumentSplitter.documents"
            },
            {
                "receiver":"DocumentCleaner-ImageOCR.documents",
                "sender":"ImageOCRToDocument.documents"
            },
            {
                "receiver":"DocumentJoiner-Result.documents",
                "sender":"DocumentCleaner-ImageOCR.documents"
            },
            {
                "receiver":"DocumentCleaner-ImageCaption.documents",
                "sender":"ImageCaptionToDocument.documents"
            },
            {
                "receiver":"DocumentJoiner-Result.documents",
                "sender":"DocumentCleaner-ImageCaption.documents"
            },
            {
                "receiver":"PythonExecutor.documents",
                "sender":"DocumentJoiner-Result.documents"
            },
            {
                "receiver":"DocumentEmbedder.documents",
                "sender":"PythonExecutor.documents"
            },
            {
                "receiver":"DocumentWriter.documents",
                "sender":"DocumentEmbedder.documents"
            }
        ],
        "edges":[

        ],
        "extra_components":[

        ]
    }
}

response = requests.post(url, json=body, headers=headers)

print(response.json()) 

返回:

{'code': 'ok', 'msg': 'ok', 'data': None}

查看工作流列表

 GET /byoa/api/v1/index_workflow

示例:

import requests
import json
url = "https://freetier-01.cn-hangzhou.cluster.cn-dev.matrixone.tech/byoa/api/v1/index_workflow"
headers = {"user-id":"0194dfaa-3eda-7ea5-b47c-b4f4f594xxxx",
    "Access-Token": "xxxx",
    "uid": "d252447b-7f1d-4fd4-8b70-9bc2dd5cd505-0194dfaa-3eda-7ea5-b47c-b4f4f594xxxx:admin:accountadmin"
}

response = requests.get(url, headers=headers)

print("Response Body:", json.dumps(response.json(), indent=4, ensure_ascii=False))

Return:

Response Body: {
    "code": "ok",
    "msg": "ok",
    "data": {
        "total": 1,
        "workflows": [
            {
                "id": "a029a904-1e1c-41af-b361-b6578c92a437",
                "job_meta_id": "53d99056-b7b8-4d9e-baf1-3d3bca24d4f3",
                "name": "wf-1",
                "source_volume_ids": [
                    "1889223879880048640"
                ],
                "source_volume_names": [
                    "b-vol1"
                ],
                "file_types": [
                    2
                ],
                "created_at": 1739377287000,
                "creator": "admin",
                "updated_at": 1739377755000,
                "modifier": "admin",
                "target_volume_id": "eb42f0a1-ab18-4010-b95c-cd1716dd5e95",
                "target_volume_name": "a-vol1",
                "process_mode": {
                    "interval": 0,
                    "offset": 0
                },
                "status": 2
            }
        ]
    }
}

查看工作流详情

GET /byoa/api/v1/index_workflow/{workflow_id}

View workflow details

GET /byoa/api/v1/index_workflow/{workflow_id}

Output parameters:

Parameter Meaning
id Workflow id
name Workflow name
job_meta_id Job metadata id
source_volume_ids Source volume list
source_volume_names Source volume list
file_types File type list
created_at Created at
creator Creator
updated_at Updated at
modifier Updater
target_volume_id Target volume id
target_volume_name Target volume name
process_mode Processing mode
status Status 1: Running; 2: Completed; 3: Stopped
workflow Workflow

Example:

import requests
import json
url = "https://freetier-01.cn-hangzhou.cluster.cn-dev.matrixone.tech/byoa/api/v1/index_workflow/ff5d119a-4e94-4968-ac0c-6ef64fcabb6c"
headers = {
    "user-id":"0194dfaa-3eda-7ea5-b47c-b4f4f594xxxx",
    "Access-Token": "xxxx",
    "uid": "181c0bfb-486f-4e55-a4ea-7fa2a5dae4fa-0194dfaa-3eda-7ea5-b47c-b4f4f594xxxx:admin:accountadmin"
}
response = requests.get(url, headers=headers)

print("Response Body:", json.dumps(response.json(), indent=4, ensure_ascii=False))

Return:

Response Body: {
    "code": "ok",
    "msg": "ok",
    "data": {
        "id": "ff5d119a-4e94-4968-ac0c-6ef64fcabb6c",
        "name": "test-2",
        "job_meta_id": "edd6ffc3-5c96-4a1b-a6ef-01d21fdbb6d0",
        "source_volume_ids": [
            "1889223879880048640"
        ],
        "source_volume_names": [
            "b-vol1"
        ],
        "file_types": [
            2
        ],
        "created_at": 1739435482000,
        "creator": "admin",
        "updated_at": 1739436347000,
        "modifier": "admin",
        "target_volume_id": "dbcc0d71-31f9-4799-b404-096f9e8e57f9",
        "target_volume_name": "a-vol2",
        "process_mode": {
            "interval": 5,
            "offset": 0
        },
        "status": 1,
        "workflow": {
            "components": [
                {
                    "name": "DocumentCleaner",
                    "type": "haystack.components.preprocessors.document_cleaner.DocumentCleaner",
                    "component_id": "DocumentCleaner_1739435478121",
                    "intro": "DocumentCleaner",
                    "position": {
                        "x": 0,
                        "y": 0
                    },
                    "input_keys": {},
                    "output_keys": {},
                    "init_parameters": {
                        "ascii_only": false,
                        "keep_id": false,
                        "remove_empty_lines": false,
                        "remove_extra_whitespaces": false,
                        "remove_regex": null,
                        "remove_repeated_substrings": false,
                        "remove_substrings": null,
                        "unicode_normalization": null
                    }
                },
                {
                    "name": "DocumentCleaner-ImageCaption",
                    "type": "haystack.components.preprocessors.document_cleaner.DocumentCleaner",
                    "component_id": "DocumentCleaner-ImageCaption_1739435478121",
                    "intro": "DocumentCleaner-ImageCaption",
                    "position": {
                        "x": 0,
                        "y": 0
                    },
                    "input_keys": {},"output_keys": {},
                    "init_parameters": {
                        "ascii_only": false,
                        "keep_id": false,
                        "remove_empty_lines": false,
                        "remove_extra_whitespaces": false,
                        "remove_regex": null,
                        "remove_repeated_substrings": false,
                        "remove_substrings": null,
                        "unicode_normalization": null
                    }
                },
                {
                    "name": "DocumentCleaner-ImageOCR",
                    "type": "haystack.components.preprocessors.document_cleaner.DocumentCleaner",
                    "component_id": "DocumentCleaner-ImageOCR_1739435478121",
                    "intro": "DocumentCleaner-ImageOCR",
                    "position": {
                        "x": 0,
                        "y": 0
                    },
                    "input_keys": {},
                    "output_keys": {},
                    "init_parameters": {
                        "ascii_only": false,
                        "keep_id": false,
                        "remove_empty_lines": false,
                        "remove_extra_whitespaces": false,
                        "remove_regex": null,
                        "remove_repeated_substrings": false,
                        "remove_substrings": null,
                        "unicode_normalization": null
                    }
                },
                {
                    "name": "DocumentEmbedder",
                    "type": "haystack.components.embedders.openai_document_embedder.OpenAIDocumentEmbedder",
                    "component_id": "DocumentEmbedder_1739435478121",
                    "intro": "DocumentEmbedder",
                    "position": {
                        "x": 0,
                        "y": 0
                    },
                    "input_keys": {},
                    "output_keys": {},
                    "init_parameters": {
                        "api_base_url": "https://api.siliconflow.cn/v1",
                        "api_key": {
                            "env_vars": [
                                "OPENAI_API_KEY"
                            ],
                            "strict": true,
                            "type": "env_var"
                        },
                        "batch_size": 32,
                        "dimensions": null,
                        "embedding_separator": "\n",
                        "meta_fields_to_embed": [],
                        "model": "BAAI/bge-m3",
                        "organization": null,
                        "prefix": "",
                        "progress_bar": true,
                        "suffix": ""
                    }
                },
                {
                    "name": "DocumentJoiner",
                    "type": "haystack.components.joiners.document_joiner.DocumentJoiner",
                    "component_id": "DocumentJoiner_1739435478121",
                    "intro": "DocumentJoiner",
                    "position": {
                        "x": 0,
                        "y": 0
                    },
                    "input_keys": {},
                    "output_keys": {},
                    "init_parameters": {
                        "join_mode": "concatenate",
                        "sort_by_score": true,
                        "top_k": null,
                        "weights": null
                    }
                },
                {
                    "name": "DocumentJoiner-Result",
                    "type": "haystack.components.joiners.document_joiner.DocumentJoiner",
                    "component_id": "DocumentJoiner-Result_1739435478121",
                    "intro": "DocumentJoiner-Result",
                    "position": {
                        "x": 0,
                        "y": 0
                    },
                    "input_keys": {},
                    "output_keys": {},
                    "init_parameters": {
                        "join_mode": "concatenate",
                        "sort_by_score": true,
                        "top_k": null,
                        "weights": null
                    }
                },
                {
                    "name": "DocumentSplitter",
                    "type": "haystack.components.preprocessors.document_splitter.DocumentSplitter",
                    "component_id": "DocumentSplitter_1739435478121",
                    "intro": "DocumentSplitter",
                    "position": {
                        "x": 0,
                        "y": 0
                    },
                    "input_keys": {},"output_keys": {},
                    "init_parameters": {
                        "split_by": "word",
                        "split_length": 800,
                        "split_overlap": 200,
                        "split_threshold": 0
                    }
                },
                {
                    "name": "DocumentWriter",
                    "type": "haystack.components.writers.document_writer.DocumentWriter",
                    "component_id": "DocumentWriter_1739435478121",
                    "intro": "DocumentWriter",
                    "position": {
                        "x": 0,
                        "y": 0
                    },
                    "input_keys": {},
                    "output_keys": {},
                    "init_parameters": {
                        "document_store": {
                            "init_parameters": {
                                "connection_string": {
                                    "env_vars": [
                                        "DATABASE_SYNC_URI"
                                    ],
                                    "strict": true,
                                    "type": "env_var"
                                },
                                "embedding_dimension": 1024,
                                "keyword_index_name": "haystack_keyword_index",
                                "recreate_table": true,
                                "table_name": "embedding_results",
                                "vector_function": "cosine_similarity"
                            },
                            "type": "byoa.integrations.document_stores.mo_document_store.MOIDocumentStore"
                        },
                        "policy": "NONE"
                    }
                },
                {
                    "name": "FileRouterComponent",
                    "type": "haystack.components.routers.file_type_router.FileTypeRouter",
                    "component_id": "FileRouterComponent_1739435478121",
                    "intro": "FileRouterComponent",
                    "position": {
                        "x": 0,
                        "y": 0
                    },
                    "input_keys": {},
                    "output_keys": {},
                    "init_parameters": {
                        "additional_mimetypes": null,
                        "mime_types": [
                            "text/plain",
                            "text/markdown",
                            "image/.*",
                            "application/pdf"
                        ]
                    }
                },
                {
                    "name": "ImageCaptionToDocument",
                    "type": "byoa.integrations.components.converters.image_caption_to_document.ImageCaptionToDocument",
                    "component_id": "ImageCaptionToDocument_1739435478121",
                    "intro": "ImageCaptionToDocument",
                    "position": {
                        "x": 0,
                        "y": 0
                    },
                    "input_keys": {},
                    "output_keys": {},
                    "init_parameters": {}
                },
                {
                    "name": "ImageOCRToDocument",
                    "type": "byoa.integrations.components.converters.image_ocr_to_document.ImageOCRToDocument",
                    "component_id": "ImageOCRToDocument_1739435478121",
                    "intro": "ImageOCRToDocument",
                    "position": {
                        "x": 0,
                        "y": 0
                    },
                    "input_keys": {},
                    "output_keys": {},
                    "init_parameters": {
                        "model": "ucaslcl/GOT-OCR2_0",
                        "tokenizer": "stepfun-ai/GOT-OCR2_0"
                    }
                },
                {
                    "name": "ImageToDocument",
                    "type": "byoa.integrations.components.converters.image_to_document.ImageToDocument",
                    "component_id": "ImageToDocument_1739435478121",
                    "intro": "ImageToDocument",
                    "position": {
                        "x": 0,
                        "y": 0
                    },
                    "input_keys": {},
                    "output_keys": {},
                    "init_parameters": {}
                },
                {
                    "name": "MagicPDFToDocument",
                    "type": "byoa.integrations.components.converters.magic_pdf_to_document.MagicPDFToDocument",
                    "component_id": "MagicPDFToDocument_1739435478121","intro": "MagicPDFToDocument",
                    "position": {
                        "x": 0,
                        "y": 0
                    },
                    "input_keys": {},
                    "output_keys": {},
                    "init_parameters": {}
                },
                {
                    "name": "MarkdownToDocument",
                    "type": "haystack.components.converters.markdown.MarkdownToDocument",
                    "component_id": "MarkdownToDocument_1739435478121",
                    "intro": "MarkdownToDocument",
                    "position": {
                        "x": 0,
                        "y": 0
                    },
                    "input_keys": {},
                    "output_keys": {},
                    "init_parameters": {
                        "progress_bar": false,
                        "table_to_single_line": false
                    }
                },
                {
                    "name": "MetadataRouter",
                    "type": "haystack.components.routers.metadata_router.MetadataRouter",
                    "component_id": "MetadataRouter_1739435478121",
                    "intro": "MetadataRouter",
                    "position": {
                        "x": 0,
                        "y": 0
                    },
                    "input_keys": {},
                    "output_keys": {},
                    "init_parameters": {
                        "rules": {
                            "image": {
                                "conditions": [
                                    {
                                        "field": "meta.content_type",
                                        "operator": "==",
                                        "value": "image"
                                    }
                                ],
                                "operator": "AND"
                            },
                            "text": {
                                "conditions": [
                                    {
                                        "field": "meta.content_type",
                                        "operator": "==",
                                        "value": "text"
                                    }
                                ],
                                "operator": "AND"
                            }
                        }
                    }
                },
                {
                    "name": "PythonExecutor",
                    "component_id": "PythonExecutor_1739435478121",
                    "intro": "PythonExecutor",
                    "position": {
                        "x": 0,
                        "y": 0
                    },
                    "input_keys": {},
                    "output_keys": {},
                    "init_parameters": {
                        "python_code": ""
                    },
                    "type": "byoa.integrations.components.python_executor.PythonExecutor"
                },
                {
                    "name": "TextFileToDocument",
                    "type": "haystack.components.converters.txt.TextFileToDocument",
                    "component_id": "TextFileToDocument_1739435478121",
                    "intro": "TextFileToDocument",
                    "position": {
                        "x": 0,
                        "y": 0
                    },
                    "input_keys": {},
                    "output_keys": {},
                    "init_parameters": {
                        "encoding": "utf8"
                    }
                }
            ],
            "connections": [
                {
                    "receiver": "TextFileToDocument.sources",
                    "sender": "FileRouterComponent.text/plain"
                },
                {
                    "receiver": "MarkdownToDocument.sources",
                    "sender": "FileRouterComponent.text/markdown"
                },
                {
                    "receiver": "ImageToDocument.sources",
                    "sender": "FileRouterComponent.image/.*"
                },
                {
                    "receiver": "MagicPDFToDocument.sources",
                    "sender": "FileRouterComponent.application/pdf"
                },
                {
                    "receiver": "DocumentJoiner.documents",
                    "sender": "TextFileToDocument.documents"
                },
                {
                    "receiver": "DocumentJoiner.documents",
                    "sender": "MarkdownToDocument.documents"
                },
                {
                    "receiver": "DocumentJoiner.documents","sender": "MagicPDFToDocument.documents"
                },
                {
                    "receiver": "DocumentJoiner.documents",
                    "sender": "ImageToDocument.documents"
                },
                {
                    "receiver": "MetadataRouter.documents",
                    "sender": "DocumentJoiner.documents"
                },
                {
                    "receiver": "DocumentCleaner.documents",
                    "sender": "MetadataRouter.text"
                },
                {
                    "receiver": "ImageOCRToDocument.documents",
                    "sender": "MetadataRouter.image"
                },
                {
                    "receiver": "ImageCaptionToDocument.documents",
                    "sender": "MetadataRouter.image"
                },
                {
                    "receiver": "DocumentSplitter.documents",
                    "sender": "DocumentCleaner.documents"
                },
                {
                    "receiver": "DocumentJoiner-Result.documents",
                    "sender": "DocumentSplitter.documents"
                },
                {
                    "receiver": "DocumentCleaner-ImageOCR.documents",
                    "sender": "ImageOCRToDocument.documents"
                },
                {
                    "receiver": "DocumentJoiner-Result.documents",
                    "sender": "DocumentCleaner-ImageOCR.documents"
                },
                {
                    "receiver": "DocumentCleaner-ImageCaption.documents",
                    "sender": "ImageCaptionToDocument.documents"
                },
                {
                    "receiver": "DocumentJoiner-Result.documents",
                    "sender": "DocumentCleaner-ImageCaption.documents"
                },
                {
                    "receiver": "PythonExecutor.documents",
                    "sender": "DocumentJoiner-Result.documents"
                },
                {
                    "receiver": "DocumentEmbedder.documents",
                    "sender": "PythonExecutor.documents"
                },
                {
                    "receiver": "DocumentWriter.documents",
                    "sender": "DocumentEmbedder.documents"
                }
            ],
            "edges": [],
            "extra_components": []
        }
    }
}

Modify Workflow

POST /byoa/api/v1/index_workflow/{workflow_id}

For input parameters, refer to theCreate a workflow step above.

Example:

import requests
import json

url = "https://freetier-01.cn-hangzhou.cluster.cn-dev.matrixone.tech/byoa/api/v1/index_workflow/fef28ca2-175e-4de9-9ac3-f4aa0da5a745"  
headers = {
    "user-id":"0194dfaa-3eda-7ea5-b47c-b4f4f594xxxx",
    "Access-Token": "xxxx",
    "uid": "fa9f114e-77e0-4c23-aa0f-e982a5ec80e2-0194dfaa-3eda-7ea5-b47c-b4f4f594xxxx:admin:accountadmin"
}

body = {
    "name":"wf-3",
    "source_volume_names":[
        "b-vol1"
    ],
    "source_volume_ids":[
        "1889223879880048640"
    ],
    "target_volume_name":"a-vol1",
    "target_volume_id":"eb42f0a1-ab18-4010-b95c-cd1716dd5e95",
    "create_target_volume_name":"",
    "process_mode":{
        "interval":0,
        "offset":0
    },
    "file_types":[
        2
    ],
    "workflow":{
        "components":[
            {
                "name":"DocumentCleaner",
                "type":"haystack.components.preprocessors.document_cleaner.DocumentCleaner",
                "component_id":"DocumentCleaner_1739377283742",
                "intro":"DocumentCleaner",
                "position":{
                    "x":0,
                    "y":0
                },
                "input_keys":{

                },
                "output_keys":{

                },
                "init_parameters":{
                    "ascii_only":"false",
                    "keep_id":"false",
                    "remove_empty_lines":"true",
                    "remove_extra_whitespaces":"true",
                    "remove_regex":"([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\\.[a-zA-Z0-9-.]+)|https?://[^\\s]+",
                    "remove_repeated_substrings":"false",
                    "remove_substrings":"null",
                    "unicode_normalization":"null"
                }
            },
            {
                "name":"DocumentCleaner-ImageCaption",
                "type":"haystack.components.preprocessors.document_cleaner.DocumentCleaner",
                "component_id":"DocumentCleaner-ImageCaption_1739377283742",
                "intro":"DocumentCleaner-ImageCaption",
                "position":{
                    "x":0,
                    "y":0
                },
                "input_keys":{

                },
                "output_keys":{},
                "init_parameters":{
                    "ascii_only":"false",
                    "keep_id":"false",
                    "remove_empty_lines":"true",
                    "remove_extra_whitespaces":"true",
                    "remove_regex":"([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\\.[a-zA-Z0-9-.]+)|https?://[^\\s]+",
                    "remove_repeated_substrings":"false",
                    "remove_substrings":"null",
                    "unicode_normalization":"null"
                }
            },
            {
                "name":"DocumentCleaner-ImageOCR",
                "type":"haystack.components.preprocessors.document_cleaner.DocumentCleaner",
                "component_id":"DocumentCleaner-ImageOCR_1739377283742",
                "intro":"DocumentCleaner-ImageOCR",
                "position":{
                    "x":0,
                    "y":0
                },
                "input_keys":{

                },
                "output_keys":{

                },
                "init_parameters":{
                    "ascii_only":"false",
                    "keep_id":"false",
                    "remove_empty_lines":"true",
                    "remove_extra_whitespaces":"true",
                    "remove_regex":"([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\\.[a-zA-Z0-9-.]+)|https?://[^\\s]+",
                    "remove_repeated_substrings":"false",
                    "remove_substrings":"null",
                    "unicode_normalization":"null"
                }
            },
            {
                "name":"DocumentEmbedder",
                "type":"haystack.components.embedders.openai_document_embedder.OpenAIDocumentEmbedder",
                "component_id":"DocumentEmbedder_1739377283742",
                "intro":"DocumentEmbedder",
                "position":{
                    "x":0,
                    "y":0
                },
                "input_keys":{

                },
                "output_keys":{

                },
                "init_parameters":{
                    "api_base_url":"https://api.siliconflow.cn/v1",
                    "api_key":{
                        "env_vars":[
                            "OPENAI_API_KEY"
                        ],
                        "strict":"true",
                        "type":"env_var"
                    },
                    "batch_size":32,
                    "dimensions":"null",
                    "embedding_separator":"\n",
                    "meta_fields_to_embed":[

                    ],
                    "model":"BAAI/bge-m3",
                    "organization":"null",
                    "prefix":"",
                    "progress_bar":"true",
                    "suffix":""
                }
            },
            {
                "name":"DocumentJoiner",
                "type":"haystack.components.joiners.document_joiner.DocumentJoiner",
                "component_id":"DocumentJoiner_1739377283742",
                "intro":"DocumentJoiner",
                "position":{
                    "x":0,
                    "y":0
                },
                "input_keys":{

                },
                "output_keys":{

                },
                "init_parameters":{
                    "join_mode":"concatenate",
                    "sort_by_score":"true",
                    "top_k":"null",
                    "weights":"null"
                }
            },
            {
                "name":"DocumentJoiner-Result",
                "type":"haystack.components.joiners.document_joiner.DocumentJoiner",
                "component_id":"DocumentJoiner-Result_1739377283742",
                "intro":"DocumentJoiner-Result",
                "position":{
                    "x":0,
                    "y":0
                },
                "input_keys":{

                },
                "output_keys":{

                },
                "init_parameters":{
                    "join_mode":"concatenate",
                    "sort_by_score":"true",
                    "top_k":"null",
                    "weights":"null"
                }
            },
            {
                "name":"DocumentSplitter",
                "type":"haystack.components.preprocessors.document_splitter.DocumentSplitter",
                "component_id":"DocumentSplitter_1739377283742",
                "intro":"DocumentSplitter",
                "position":{
                    "x":0,
                    "y":0
                },
                "input_keys":{

                },
                "output_keys":{

                },
                "init_parameters":{
                    "split_by":"word",
                    "split_length":800,
                    "split_overlap":200,"split_threshold":0
                }
            },
            {
                "name":"DocumentWriter",
                "type":"haystack.components.writers.document_writer.DocumentWriter",
                "component_id":"DocumentWriter_1739377283742",
                "intro":"DocumentWriter",
                "position":{
                    "x":0,
                    "y":0
                },
                "input_keys":{

                },
                "output_keys":{

                },
                "init_parameters":{
                    "document_store":{
                        "init_parameters":{
                            "connection_string":{
                                "env_vars":[
                                    "DATABASE_SYNC_URI"
                                ],
                                "strict":"true",
                                "type":"env_var"
                            },
                            "embedding_dimension":1024,
                            "keyword_index_name":"haystack_keyword_index",
                            "recreate_table":"true",
                            "table_name":"embedding_results",
                            "vector_function":"cosine_similarity"
                        },
                        "type":"byoa.integrations.document_stores.mo_document_store.MOIDocumentStore"
                    },
                    "policy":"NONE"
                }
            },
            {
                "name":"FileRouterComponent",
                "type":"haystack.components.routers.file_type_router.FileTypeRouter",
                "component_id":"FileRouterComponent_1739377283742",
                "intro":"FileRouterComponent",
                "position":{
                    "x":0,
                    "y":0
                },
                "input_keys":{

                },
                "output_keys":{

                },
                "init_parameters":{
                    "additional_mimetypes":"null",
                    "mime_types":[
                        "text/plain",
                        "text/markdown",
                        "image/.*",
                        "application/pdf"
                    ]
                }
            },
            {
                "name":"ImageCaptionToDocument",
                "type":"byoa.integrations.components.converters.image_caption_to_document.ImageCaptionToDocument",
                "component_id":"ImageCaptionToDocument_1739377283742",
                "intro":"ImageCaptionToDocument",
                "position":{
                    "x":0,
                    "y":0
                },
                "input_keys":{

                },
                "output_keys":{

                },
                "init_parameters":{

                }
            },
            {
                "name":"ImageOCRToDocument",
                "type":"byoa.integrations.components.converters.image_ocr_to_document.ImageOCRToDocument",
                "component_id":"ImageOCRToDocument_1739377283742",
                "intro":"ImageOCRToDocument",
                "position":{
                    "x":0,
                    "y":0
                },
                "input_keys":{

                },
                "output_keys":{

                },
                "init_parameters":{
                    "model":"ucaslcl/GOT-OCR2_0",
                    "tokenizer":"stepfun-ai/GOT-OCR2_0"
                }
            },
            {
                "name":"ImageToDocument",
                "type":"byoa.integrations.components.converters.image_to_document.ImageToDocument",
                "component_id":"ImageToDocument_1739377283742",
                "intro":"ImageToDocument",
                "position":{
                    "x":0,
                    "y":0
                },
                "input_keys":{

                },
                "output_keys":{

                },
                "init_parameters":{

                }
            },
            {
                "name":"MagicPDFToDocument",
                "type":"byoa.integrations.components.converters.magic_pdf_to_document.MagicPDFToDocument",
                "component_id":"MagicPDFToDocument_1739377283742",
                "intro":"MagicPDFToDocument",
                "position":{
                    "x":0,
                    "y":0
                },
                "input_keys":{

                },
                "output_keys":{

                },
                "init_parameters":{

                }
            },
            {
                "name":"MarkdownToDocument",
                "type":"haystack.components.converters.markdown.MarkdownToDocument","component_id":"MarkdownToDocument_1739377283742",
                "intro":"MarkdownToDocument",
                "position":{
                    "x":0,
                    "y":0
                },
                "input_keys":{

                },
                "output_keys":{

                },
                "init_parameters":{
                    "progress_bar":"false",
                    "table_to_single_line":"false"
                }
            },
            {
                "name":"MetadataRouter",
                "type":"haystack.components.routers.metadata_router.MetadataRouter",
                "component_id":"MetadataRouter_1739377283742",
                "intro":"MetadataRouter",
                "position":{
                    "x":0,
                    "y":0
                },
                "input_keys":{

                },
                "output_keys":{

                },
                "init_parameters":{
                    "rules":{
                        "image":{
                            "conditions":[
                                {
                                    "field":"meta.content_type",
                                    "operator":"==",
                                    "value":"image"
                                }
                            ],
                            "operator":"AND"
                        },
                        "text":{
                            "conditions":[
                                {
                                    "field":"meta.content_type",
                                    "operator":"==",
                                    "value":"text"
                                }
                            ],
                            "operator":"AND"
                        }
                    }
                }
            },
            {
                "name":"PythonExecutor",
                "component_id":"PythonExecutor_1739377283742",
                "intro":"PythonExecutor",
                "position":{
                    "x":0,
                    "y":0
                },
                "input_keys":{

                },
                "output_keys":{

                },
                "init_parameters":{
                    "python_code":""
                },
                "type":"byoa.integrations.components.python_executor.PythonExecutor"
            },
            {
                "name":"TextFileToDocument",
                "type":"haystack.components.converters.txt.TextFileToDocument",
                "component_id":"TextFileToDocument_1739377283742",
                "intro":"TextFileToDocument",
                "position":{
                    "x":0,
                    "y":0
                },
                "input_keys":{

                },
                "output_keys":{

                },
                "init_parameters":{
                    "encoding":"utf8"
                }
            }
        ],
        "connections":[
            {
                "receiver":"TextFileToDocument.sources",
                "sender":"FileRouterComponent.text/plain"
            },
            {
                "receiver":"MarkdownToDocument.sources",
                "sender":"FileRouterComponent.text/markdown"
            },
            {
                "receiver":"ImageToDocument.sources",
                "sender":"FileRouterComponent.image/.*"
            },
            {
                "receiver":"MagicPDFToDocument.sources",
                "sender":"FileRouterComponent.application/pdf"
            },
            {
                "receiver":"DocumentJoiner.documents",
                "sender":"TextFileToDocument.documents"
            },
            {
                "receiver":"DocumentJoiner.documents",
                "sender":"MarkdownToDocument.documents"
            },
            {
                "receiver":"DocumentJoiner.documents",
                "sender":"MagicPDFToDocument.documents"
            },
            {
                "receiver":"DocumentJoiner.documents",
                "sender":"ImageToDocument.documents"
            },
            {
                "receiver":"MetadataRouter.documents",
                "sender":"DocumentJoiner.documents"
            },
            {
                "receiver":"DocumentCleaner.documents",
                "sender":"MetadataRouter.text"
            },
            {
                "receiver":"ImageOCRToDocument.documents",
                "sender":"MetadataRouter.image"
            },
            {
                "receiver":"ImageCaptionToDocument.documents",
                "sender":"MetadataRouter.image"
            },
            {
                "receiver":"DocumentSplitter.documents","sender":"DocumentCleaner.documents"
            },
            {
                "receiver":"DocumentJoiner-Result.documents",
                "sender":"DocumentSplitter.documents"
            },
            {
                "receiver":"DocumentCleaner-ImageOCR.documents",
                "sender":"ImageOCRToDocument.documents"
            },
            {
                "receiver":"DocumentJoiner-Result.documents",
                "sender":"DocumentCleaner-ImageOCR.documents"
            },
            {
                "receiver":"DocumentCleaner-ImageCaption.documents",
                "sender":"ImageCaptionToDocument.documents"
            },
            {
                "receiver":"DocumentJoiner-Result.documents",
                "sender":"DocumentCleaner-ImageCaption.documents"
            },
            {
                "receiver":"PythonExecutor.documents",
                "sender":"DocumentJoiner-Result.documents"
            },
            {
                "receiver":"DocumentEmbedder.documents",
                "sender":"PythonExecutor.documents"
            },
            {
                "receiver":"DocumentWriter.documents",
                "sender":"DocumentEmbedder.documents"
            }
        ],
        "edges":[

        ],
        "extra_components":[

        ]
    }
}

response = requests.put(url, json=body, headers=headers)
print(response.json()) 

Return:

{'code': 'ok', 'msg': 'ok', 'data': None}

Delete Workflow

DELETE /byoa/api/v1/index_workflow/{workflow_id}?[delete_data=true]

Example

import requests
import json

url = "https://freetier-01.cn-hangzhou.cluster.cn-dev.matrixone.tech/byoa/api/v1/index_workflow/729e7a03-652d-46e0-bdad-b05ec5b80cea?delete_data=true"

headers = {
    "user-id":"0194dfaa-3eda-7ea5-b47c-b4f4f594xxxx",
    "Access-Token": "xxxx",
    "uid": "011d4b66-ace5-4d58-88a4-bc76719acda5-0194dfaa-3eda-7ea5-b47c-b4f4f594xxxx:admin:accountadmin"
}

response = requests.delete(url, headers=headers)

if response.status_code == 200:
    print(response.json())  
else:
    print(f"请求失败,状态码:{response.status_code}, 错误信息:{response.text}")

Return:

{'code': 'ok', 'msg': 'ok', 'data': None}

Job

View the job list

GET /byoa/api/v1/index_workflow_job

Output parameters:

Parameter Meaning
id Job id
workflow_name Workflow name
name Connector name
source_volume_names Original volume name
source_volume_ids Original volume id
target_volume_name Target volume name
target_volume_id Target volume id
file_types File type, 2 is pdf format
status Workflow status
workflow_id Workflow id

Example:

import requests
import json

url = "https://freetier-01.cn-hangzhou.cluster.cn-dev.matrixone.tech/byoa/api/v1/index_workflow_job"  
headers = {
    "user-id":"0194dfaa-3eda-7ea5-b47c-b4f4f594xxxx",
    "Access-Token": "xxxx",
    "uid": "011d4b66-ace5-4d58-88a4-bc76719acda5-0194dfaa-3eda-7ea5-b47c-b4f4f594xxxx:admin:accountadmin"
}
response = requests.get(url, headers=headers)

print("Response Body:", json.dumps(response.json(), indent=4, ensure_ascii=False))

Return:

Response Body: {
    "code": "ok",
    "msg": "ok",
    "data": {
        "total": 10,
        "jobs": [
            {
                "id": "0194fb2c-f5c5-7d42-8d09-fdc3d8414777",
                "workflow_name": "wf-2",
                "source_volume_names": [
                    "b-vol1"
                ],
                "source_volume_ids": [
                    "1889223879880048640"
                ],
                "target_volume_name": "a-vol2",
                "target_volume_id": "dbcc0d71-31f9-4799-b404-096f9e8e57f9",
                "file_types": [
                    2
                ],
                "start_time": "2025-02-12T17:20:15.000000+0000",
                "end_time": "2025-02-12T17:20:15.000000+0000",
                "duration": 0,
                "processed_count": 0,
                "total_count": 0,
                "status": 2,
                "workflow_id": "729e7a03-652d-46e0-bdad-b05ec5b80cea",
                "workflow": {
                    "components": null,
                    "connections": null,
                    "edges": null,
                    "extra_components": null
                }
            },
            {
                "id": "0194fb28-61ae-7aac-8500-a4c924a68211",
                "workflow_name": "wf-4",
                "source_volume_names": [
                    "b-vol1"
                ],
                "source_volume_ids": [
                    "1889223879880048640"
                ],
                "target_volume_name": "a-vol2","target_volume_id": "dbcc0d71-31f9-4799-b404-096f9e8e57f9",
                "file_types": [
                    2
                ],
                "start_time": "2025-02-12T17:15:15.000000+0000",
                "end_time": "2025-02-12T17:21:17.000000+0000",
                "duration": 362,
                "processed_count": 1,
                "total_count": 1,
                "status": 2,
                "workflow_id": "c6dcbad5-f85d-42b7-942c-2e8d3445a4e6",
                "workflow": {
                    "components": null,
                    "connections": null,
                    "edges": null,
                    "extra_components": null
                }
            },
            {
                "id": "0194fb28-61af-708a-915e-6f140a2424fe",
                "workflow_name": "wf-2",
                "source_volume_names": [
                    "b-vol1"
                ],
                "source_volume_ids": [
                    "1889223879880048640"
                ],
                "target_volume_name": "a-vol2",
                "target_volume_id": "dbcc0d71-31f9-4799-b404-096f9e8e57f9",
                "file_types": [
                    2
                ],
                "start_time": "2025-02-12T17:15:15.000000+0000",
                "end_time": "2025-02-12T17:15:15.000000+0000",
                "duration": 0,
                "processed_count": 0,
                "total_count": 0,
                "status": 2,
                "workflow_id": "729e7a03-652d-46e0-bdad-b05ec5b80cea",
                "workflow": {
                    "components": null,
                    "connections": null,
                    "edges": null,
                    "extra_components": null
                }
            },
            {
                "id": "0194fb23-cd65-767c-b58f-db7c4456b896",
                "workflow_name": "wf-2",
                "source_volume_names": [
                    "b-vol1"
                ],
                "source_volume_ids": [
                    "1889223879880048640"
                ],
                "target_volume_name": "a-vol2",
                "target_volume_id": "dbcc0d71-31f9-4799-b404-096f9e8e57f9",
                "file_types": [
                    2
                ],
                "start_time": "2025-02-12T17:10:15.000000+0000",
                "end_time": "2025-02-12T17:16:17.000000+0000",
                "duration": 362,
                "processed_count": 1,
                "total_count": 1,
                "status": 2,
                "workflow_id": "729e7a03-652d-46e0-bdad-b05ec5b80cea",
                "workflow": {
                    "components": null,
                    "connections": null,
                    "edges": null,
                    "extra_components": null
                }
            },
            {
                "id": "0194fb06-8044-70e4-8a54-16f5a0e3c720",
                "workflow_name": "wf-3",
                "source_volume_names": [
                    "b-vol1"
                ],
                "source_volume_ids": [
                    "1889223879880048640"
                ],
                "target_volume_name": "a-vol1",
                "target_volume_id": "eb42f0a1-ab18-4010-b95c-cd1716dd5e95",
                "file_types": [
                    2
                ],
                "start_time": "2025-02-12T16:38:15.000000+0000",
                "end_time": "2025-02-12T16:39:16.000000+0000",
                "duration": 61,
                "processed_count": 1,
                "total_count": 1,
                "status": 3,
                "workflow_id": "2c0be55b-af55-4787-baac-3d8e7d987fe7",
                "workflow": {
                    "components": null,
                    "connections": null,
                    "edges": null,
                    "extra_components": null
                }
            },
            {
                "id": "0194faf7-d9a0-7347-8726-a86f52cf67c7",
                "workflow_name": "wf-1",
                "source_volume_names": [
                    "b-vol1"
                ],
                "source_volume_ids": [
                    "1889223879880048640"
                ],
                "target_volume_name": "a-vol1",
                "target_volume_id": "eb42f0a1-ab18-4010-b95c-cd1716dd5e95",
                "file_types": [
                    2
                ],
                "start_time": "2025-02-12T16:22:15.000000+0000",
                "end_time": "2025-02-12T16:28:16.000000+0000",
                "duration": 361,
                "processed_count": 1,
                "total_count": 1,
                "status": 2,
                "workflow_id": "a029a904-1e1c-41af-b361-b6578c92a437",
                "workflow": {
                    "components": null,"connections": null,
                    "edges": null,
                    "extra_components": null
                }
            },
            {
                "id": "0194f423-c2a7-7cc5-87ce-97fa942ac6ce",
                "workflow_name": "wk-3",
                "source_volume_names": [
                    "b-vol1"
                ],
                "source_volume_ids": [
                    "1889223879880048640"
                ],
                "target_volume_name": "a-vol1",
                "target_volume_id": "eb42f0a1-ab18-4010-b95c-cd1716dd5e95",
                "file_types": [
                    2
                ],
                "start_time": "2025-02-11T08:32:52.000000+0000",
                "end_time": "2025-02-11T08:38:52.000000+0000",
                "duration": 360,
                "processed_count": 1,
                "total_count": 1,
                "status": 2,
                "workflow_id": "4f209aa9-186c-442a-b324-d7eebaca4cd0",
                "workflow": {
                    "components": null,
                    "connections": null,
                    "edges": null,
                    "extra_components": null
                }
            },
            {
                "id": "0194f41d-59d3-7899-9fa8-24343214df7f",
                "workflow_name": "wf-3",
                "source_volume_names": [
                    "b-vol1"
                ],
                "source_volume_ids": [
                    "1889223879880048640"
                ],
                "target_volume_name": "a-vol1",
                "target_volume_id": "eb42f0a1-ab18-4010-b95c-cd1716dd5e95",
                "file_types": [
                    2
                ],
                "start_time": "2025-02-11T08:25:52.000000+0000",
                "end_time": "2025-02-11T08:31:52.000000+0000",
                "duration": 360,
                "processed_count": 1,
                "total_count": 1,
                "status": 2,
                "workflow_id": "ea64f8ba-b984-46a3-acb0-628849538244",
                "workflow": {
                    "components": null,
                    "connections": null,
                    "edges": null,
                    "extra_components": null
                }
            },
            {
                "id": "0194f40f-9da9-7927-890c-4bea252e0235",
                "workflow_name": "wf-2",
                "source_volume_names": [
                    "b-vol1"
                ],
                "source_volume_ids": [
                    "1889223879880048640"
                ],
                "target_volume_name": "a-vol1",
                "target_volume_id": "eb42f0a1-ab18-4010-b95c-cd1716dd5e95",
                "file_types": [
                    2
                ],
                "start_time": "2025-02-11T08:10:52.000000+0000",
                "end_time": "2025-02-11T08:10:52.000000+0000",
                "duration": 0,
                "processed_count": 0,
                "total_count": 0,
                "status": 2,
                "workflow_id": "d2842368-37dc-4b49-930a-25f16a8fc0c8",
                "workflow": {
                    "components": null,
                    "connections": null,
                    "edges": null,
                    "extra_components": null
                }
            },
            {
                "id": "0194f40c-deb5-7466-bd7f-7c930a034bcd",
                "workflow_name": "wf-1",
                "source_volume_names": [
                    "b-vol1"
                ],
                "source_volume_ids": [
                    "1889223879880048640"
                ],
                "target_volume_name": "a-vol1",
                "target_volume_id": "eb42f0a1-ab18-4010-b95c-cd1716dd5e95",
                "file_types": [
                    2
                ],
                "start_time": "2025-02-11T08:07:52.000000+0000",
                "end_time": "2025-02-11T08:07:52.000000+0000",
                "duration": 0,
                "processed_count": 0,
                "total_count": 0,
                "status": 2,
                "workflow_id": "f6c0b040-5403-42b9-a914-bbf2935d69f0",
                "workflow": {
                    "components": null,
                    "connections": null,
                    "edges": null,
                    "extra_components": null
                }
            }
        ]
    }
}

View job details

GET /byoa/api/v1/index_workflow_job/{job_id}

Example:

import requests
import json

url = "https://freetier-01.cn-hangzhou.cluster.cn-dev.matrixone.tech/byoa/api/v1/index_workflow_job/0194fb2c-f5c5-7d42-8d09-fdc3d8414777"
headers = {
    "user-id":"0194dfaa-3eda-7ea5-b47c-b4f4f594xxxx",
    "Access-Token": "xxxx","uid": "a6e11303-f4fd-46c0-b5ff-c774e96f64a3-0194dfaa-3eda-7ea5-b47c-b4f4f594xxxx:admin:accountadmin"
}
response = requests.get(url, headers=headers)

print("Response Body:", json.dumps(response.json(), indent=4, ensure_ascii=False))

return

Response Body: {
    "code": "ok",
    "msg": "ok",
    "data": {
        "id": "0194fb2c-f5c5-7d42-8d09-fdc3d8414777",
        "workflow_name": "wf-2",
        "source_volume_names": [
            "b-vol1"
        ],
        "source_volume_ids": [
            "1889223879880048640"
        ],
        "target_volume_name": "a-vol2",
        "target_volume_id": "dbcc0d71-31f9-4799-b404-096f9e8e57f9",
        "file_types": [
            2
        ],
        "start_time": "2025-02-12T17:20:15.000000+0000",
        "end_time": "2025-02-12T17:20:15.000000+0000",
        "duration": 0,
        "processed_count": 0,
        "total_count": 0,
        "status": 2,
        "workflow_id": "729e7a03-652d-46e0-bdad-b05ec5b80cea",
        "workflow": {
            "components": [
                {
                    "name": "DocumentCleaner",
                    "type": "haystack.components.preprocessors.document_cleaner.DocumentCleaner",
                    "component_id": "DocumentCleaner_1739380168023",
                    "intro": "DocumentCleaner",
                    "position": {
                        "x": 0,
                        "y": 0
                    },
                    "input_keys": {},
                    "output_keys": {},
                    "init_parameters": {
                        "ascii_only": false,
                        "keep_id": false,
                        "remove_empty_lines": true,
                        "remove_extra_whitespaces": true,
                        "remove_regex": null,
                        "remove_repeated_substrings": false,
                        "remove_substrings": null,
                        "unicode_normalization": null
                    }
                },
                {
                    "name": "DocumentCleaner-ImageCaption",
                    "type": "haystack.components.preprocessors.document_cleaner.DocumentCleaner",
                    "component_id": "DocumentCleaner-ImageCaption_1739380168023",
                    "intro": "DocumentCleaner-ImageCaption",
                    "position": {
                        "x": 0,
                        "y": 0
                    },
                    "input_keys": {},
                    "output_keys": {},
                    "init_parameters": {
                        "ascii_only": false,
                        "keep_id": false,
                        "remove_empty_lines": true,
                        "remove_extra_whitespaces": true,
                        "remove_regex": null,
                        "remove_repeated_substrings": false,
                        "remove_substrings": null,
                        "unicode_normalization": null
                    }
                },
                {
                    "name": "DocumentCleaner-ImageOCR",
                    "type": "haystack.components.preprocessors.document_cleaner.DocumentCleaner",
                    "component_id": "DocumentCleaner-ImageOCR_1739380168023",
                    "intro": "DocumentCleaner-ImageOCR",
                    "position": {
                        "x": 0,
                        "y": 0
                    },
                    "input_keys": {},
                    "output_keys": {},
                    "init_parameters": {
                        "ascii_only": false,
                        "keep_id": false,
                        "remove_empty_lines": true,
                        "remove_extra_whitespaces": true,
                        "remove_regex": null,
                        "remove_repeated_substrings": false,
                        "remove_substrings": null,
                        "unicode_normalization": null
                    }
                },
                {
                    "name": "DocumentEmbedder",
                    "type": "haystack.components.embedders.openai_document_embedder.OpenAIDocumentEmbedder",
                    "component_id": "DocumentEmbedder_1739380168023",
                    "intro": "DocumentEmbedder",
                    "position": {
                        "x": 0,
                        "y": 0
                    },
                    "input_keys": {},
                    "output_keys": {},
                    "init_parameters": {
                        "api_base_url": "https://api.siliconflow.cn/v1",
                        "api_key": {
                            "env_vars": [
                                "OPENAI_API_KEY"
                            ],"strict": true,
                            "type": "env_var"
                        },
                        "batch_size": 32,
                        "dimensions": null,
                        "embedding_separator": "\n",
                        "meta_fields_to_embed": [],
                        "model": "BAAI/bge-m3",
                        "organization": null,
                        "prefix": "",
                        "progress_bar": true,
                        "suffix": ""
                    }
                },
                {
                    "name": "DocumentJoiner",
                    "type": "haystack.components.joiners.document_joiner.DocumentJoiner",
                    "component_id": "DocumentJoiner_1739380168023",
                    "intro": "DocumentJoiner",
                    "position": {
                        "x": 0,
                        "y": 0
                    },
                    "input_keys": {},
                    "output_keys": {},
                    "init_parameters": {
                        "join_mode": "concatenate",
                        "sort_by_score": true,
                        "top_k": null,
                        "weights": null
                    }
                },
                {
                    "name": "DocumentJoiner-Result",
                    "type": "haystack.components.joiners.document_joiner.DocumentJoiner",
                    "component_id": "DocumentJoiner-Result_1739380168023",
                    "intro": "DocumentJoiner-Result",
                    "position": {
                        "x": 0,
                        "y": 0
                    },
                    "input_keys": {},
                    "output_keys": {},
                    "init_parameters": {
                        "join_mode": "concatenate",
                        "sort_by_score": true,
                        "top_k": null,
                        "weights": null
                    }
                },
                {
                    "name": "DocumentSplitter",
                    "type": "haystack.components.preprocessors.document_splitter.DocumentSplitter",
                    "component_id": "DocumentSplitter_1739380168023",
                    "intro": "DocumentSplitter",
                    "position": {
                        "x": 0,
                        "y": 0
                    },
                    "input_keys": {},
                    "output_keys": {},
                    "init_parameters": {
                        "split_by": "word",
                        "split_length": 200,
                        "split_overlap": 200,
                        "split_threshold": 0
                    }
                },
                {
                    "name": "DocumentWriter",
                    "type": "haystack.components.writers.document_writer.DocumentWriter",
                    "component_id": "DocumentWriter_1739380168023",
                    "intro": "DocumentWriter",
                    "position": {
                        "x": 0,
                        "y": 0
                    },
                    "input_keys": {},
                    "output_keys": {},
                    "init_parameters": {
                        "document_store": {
                            "init_parameters": {
                                "connection_string": {
                                    "env_vars": [
                                        "DATABASE_SYNC_URI"
                                    ],
                                    "strict": true,
                                    "type": "env_var"
                                },
                                "embedding_dimension": 1024,
                                "keyword_index_name": "haystack_keyword_index",
                                "recreate_table": true,
                                "table_name": "embedding_results",
                                "vector_function": "cosine_similarity"
                            },
                            "type": "byoa.integrations.document_stores.mo_document_store.MOIDocumentStore"
                        },
                        "policy": "NONE"
                    }
                },
                {
                    "name": "FileRouterComponent",
                    "type": "haystack.components.routers.file_type_router.FileTypeRouter",
                    "component_id": "FileRouterComponent_1739380168023",
                    "intro": "FileRouterComponent",
                    "position": {
                        "x": 0,
                        "y": 0
                    },
                    "input_keys": {},
                    "output_keys": {},"init_parameters": {
                        "additional_mimetypes": null,
                        "mime_types": [
                            "text/plain",
                            "text/markdown",
                            "image/.*",
                            "application/pdf"
                        ]
                    }
                },
                {
                    "name": "ImageCaptionToDocument",
                    "type": "byoa.integrations.components.converters.image_caption_to_document.ImageCaptionToDocument",
                    "component_id": "ImageCaptionToDocument_1739380168023",
                    "intro": "ImageCaptionToDocument",
                    "position": {
                        "x": 0,
                        "y": 0
                    },
                    "input_keys": {},
                    "output_keys": {},
                    "init_parameters": {}
                },
                {
                    "name": "ImageOCRToDocument",
                    "type": "byoa.integrations.components.converters.image_ocr_to_document.ImageOCRToDocument",
                    "component_id": "ImageOCRToDocument_1739380168023",
                    "intro": "ImageOCRToDocument",
                    "position": {
                        "x": 0,
                        "y": 0
                    },
                    "input_keys": {},
                    "output_keys": {},
                    "init_parameters": {
                        "model": "ucaslcl/GOT-OCR2_0",
                        "tokenizer": "stepfun-ai/GOT-OCR2_0"
                    }
                },
                {
                    "name": "ImageToDocument",
                    "type": "byoa.integrations.components.converters.image_to_document.ImageToDocument",
                    "component_id": "ImageToDocument_1739380168023",
                    "intro": "ImageToDocument",
                    "position": {
                        "x": 0,
                        "y": 0
                    },
                    "input_keys": {},
                    "output_keys": {},
                    "init_parameters": {}
                },
                {
                    "name": "MagicPDFToDocument",
                    "type": "byoa.integrations.components.converters.magic_pdf_to_document.MagicPDFToDocument",
                    "component_id": "MagicPDFToDocument_1739380168023",
                    "intro": "MagicPDFToDocument",
                    "position": {
                        "x": 0,
                        "y": 0
                    },
                    "input_keys": {},
                    "output_keys": {},
                    "init_parameters": {}
                },
                {
                    "name": "MarkdownToDocument",
                    "type": "haystack.components.converters.markdown.MarkdownToDocument",
                    "component_id": "MarkdownToDocument_1739380168023",
                    "intro": "MarkdownToDocument",
                    "position": {
                        "x": 0,
                        "y": 0
                    },
                    "input_keys": {},
                    "output_keys": {},
                    "init_parameters": {
                        "progress_bar": false,
                        "table_to_single_line": false
                    }
                },
                {
                    "name": "MetadataRouter",
                    "type": "haystack.components.routers.metadata_router.MetadataRouter",
                    "component_id": "MetadataRouter_1739380168023",
                    "intro": "MetadataRouter",
                    "position": {
                        "x": 0,
                        "y": 0
                    },
                    "input_keys": {},
                    "output_keys": {},
                    "init_parameters": {
                        "rules": {
                            "image": {
                                "conditions": [
                                    {
                                        "field": "meta.content_type",
                                        "operator": "==",
                                        "value": "image"
                                    }
                                ],
                                "operator": "AND"
                            },
                            "text": {
                                "conditions": [
                                    {
                                        "field": "meta.content_type",
                                        "operator": "==",
                                        "value": "text"
                                    }],
                                "operator": "AND"
                            }
                        }
                    }
                },
                {
                    "name": "PythonExecutor",
                    "component_id": "PythonExecutor_1739380168023",
                    "intro": "PythonExecutor",
                    "position": {
                        "x": 0,
                        "y": 0
                    },
                    "input_keys": {},
                    "output_keys": {},
                    "init_parameters": {
                        "python_code": ""
                    },
                    "type": "byoa.integrations.components.python_executor.PythonExecutor"
                },
                {
                    "name": "TextFileToDocument",
                    "type": "haystack.components.converters.txt.TextFileToDocument",
                    "component_id": "TextFileToDocument_1739380168023",
                    "intro": "TextFileToDocument",
                    "position": {
                        "x": 0,
                        "y": 0
                    },
                    "input_keys": {},
                    "output_keys": {},
                    "init_parameters": {
                        "encoding": "utf8"
                    }
                }
            ],
            "connections": [
                {
                    "receiver": "TextFileToDocument.sources",
                    "sender": "FileRouterComponent.text/plain"
                },
                {
                    "receiver": "MarkdownToDocument.sources",
                    "sender": "FileRouterComponent.text/markdown"
                },
                {
                    "receiver": "ImageToDocument.sources",
                    "sender": "FileRouterComponent.image/.*"
                },
                {
                    "receiver": "MagicPDFToDocument.sources",
                    "sender": "FileRouterComponent.application/pdf"
                },
                {
                    "receiver": "DocumentJoiner.documents",
                    "sender": "TextFileToDocument.documents"
                },
                {
                    "receiver": "DocumentJoiner.documents",
                    "sender": "MarkdownToDocument.documents"
                },
                {
                    "receiver": "DocumentJoiner.documents",
                    "sender": "MagicPDFToDocument.documents"
                },
                {
                    "receiver": "DocumentJoiner.documents",
                    "sender": "ImageToDocument.documents"
                },
                {
                    "receiver": "MetadataRouter.documents",
                    "sender": "DocumentJoiner.documents"
                },
                {
                    "receiver": "DocumentCleaner.documents",
                    "sender": "MetadataRouter.text"
                },
                {
                    "receiver": "ImageOCRToDocument.documents",
                    "sender": "MetadataRouter.image"
                },
                {
                    "receiver": "ImageCaptionToDocument.documents",
                    "sender": "MetadataRouter.image"
                },
                {
                    "receiver": "DocumentSplitter.documents",
                    "sender": "DocumentCleaner.documents"
                },
                {
                    "receiver": "DocumentJoiner-Result.documents",
                    "sender": "DocumentSplitter.documents"
                },
                {
                    "receiver": "DocumentCleaner-ImageOCR.documents",
                    "sender": "ImageOCRToDocument.documents"
                },
                {
                    "receiver": "DocumentJoiner-Result.documents",
                    "sender": "DocumentCleaner-ImageOCR.documents"
                },
                {
                    "receiver": "DocumentCleaner-ImageCaption.documents",
                    "sender": "ImageCaptionToDocument.documents"
                },
                {
                    "receiver": "DocumentJoiner-Result.documents",
                    "sender": "DocumentCleaner-ImageCaption.documents"
                },
                {
                    "receiver": "PythonExecutor.documents",
                    "sender": "DocumentJoiner-Result.documents"
                },
                {
                    "receiver": "DocumentEmbedder.documents",
                    "sender": "PythonExecutor.documents"
                },
                {
                    "receiver": "DocumentWriter.documents",
                    "sender": "DocumentEmbedder.documents"
                }],
            "edges": [],
            "extra_components": []
        }
    }
}
GET /byoa/api/v1/index_workflow_job/{job_id}/files

Output parameters:

Parameters Meaning
id File id
file_type File type, 2 is pdf.
file_status file status

Example:

import requests
import json

url = "https://freetier-01.cn-hangzhou.cluster.cn-dev.matrixone.tech/byoa/api/v1/index_workflow_job/0194f423-c2a7-7cc5-87ce-97fa942ac6ce/files"
headers = {
    "user-id":"0194dfaa-3eda-7ea5-b47c-b4f4f594xxxx",
    "Access-Token": "xxxx",
    "uid": "a6e11303-f4fd-46c0-b5ff-c774e96f64a3-0194dfaa-3eda-7ea5-b47c-b4f4f594xxxx:admin:accountadmin"
}
response = requests.get(url, headers=headers)

print("Response Body:", json.dumps(response.json(), indent=4, ensure_ascii=False))

return

Response Body: {
    "code": "ok",
    "msg": "ok",
    "data": {
        "files": [
            {
                "id": "0194f423-c2a7-7ccc-a3da-732bafda96a3",
                "file_name": "Dream of the Red Chamber (Public Book) Simplified Chinese horizontal row.pdf",
                "file_type": 2,
                "file_status": 2,
                "error_message": "",
                "start_time": "2025-02-11T08:32:52.000000+0000",
                "end_time": "2025-02-11T08:37:53.000000+0000"
            }
        ],
        "total": 1,
        "completed": 1,
        "failed": 0,
        "processing": 0,
        "pending": 0
    }
}

Reprocessing failed files

POST /byoa/api/v1/index_workflow_job/{job_id}/files

Input parameters:

Parameters Required Meaning
files Yes File id

Example:

import requests
import json

url = "https://freetier-01.cn-hangzhou.cluster.cn-dev.matrixone.tech/byoa/api/v1/index_workflow_job/0194f423-c2a7-7cc5-87ce-97fa942ac6ce/files"

headers = {
    "user-id":"xxxx",
    "Access-Token": "xxxx",
    "uid": "011d4b66-ace5-4d58-88a4-bc76719acda5-0194dfaa-3eda-7ea5-b47c-b4f4f594xxxx:admin:accountadmin"
}

body = {
    "files": ["0194f423-c2a7-7ccc-a3da-732bafda96a3"]
}

response = requests.post(url, headers=headers)

if response.status_code == 200:
    print(response.json())
else:
    print(f"Request failed, status code: {response.status_code}, error message: {response.text}")

return:

{'code': 'ok', 'msg': 'ok', 'data': None}