Data processing related API
Workflow
Create a workflow
POST /byoa/api/v1/index_workflow
Input parameters:
| Parameters | Required | Meaning |
|---|---|---|
| name | Yes | Workflow Name |
| source_volume_names | Yes | Original volume name |
| source_volume_ids | Yes | Original volume id |
| target_volume_name | Yes | Target volume name |
| target_volume_ids | Yes | Target volume id |
| create_target_volume_name | No | Create new target volume name |
| process_mode | Yes | Process mode, interval: 0: one-time processing; 1:5 minutes; 2:10 minutes; 3:30 minutes; 4:1 hour; 5:2 hours; 6:4 hours; 7:6 hours; 8:8 hours; 9: one day; |
| file_types | Yes | File type, currently only supports 2, pdf type |
| workflow | Yes | Workflow, split_length: The maximum length of segments, the minimum is 100, the maximum is 2000; Text preprocessing rules: remove_empty_lines: true means replace consecutive spaces, newlines and tabs, remove_extra_whitespaces: true means delete all URLs and email addresses. |
Example:
import requests
import json
url = "https://freetier-01.cn-hangzhou.cluster.cn-dev.matrixone.tech/byoa/api/v1/index_workflow"
headers = {
"user-id":"0194dfaa-3eda-7ea5-b47c-b4f4f594xxxx",
"Access-Token": "xxxx",
"uid": "dea010be-1a50-413a-aa7e-e0611a491cab-0194dfaa-3eda-7ea5-b47c-b4f4f594xxxx:admin:accountadmin",
}
body = {
"name":"wf-3",
"source_volume_names":[
"b-vol1"
],
"source_volume_ids":[
"1889223879880048640"
],
"target_volume_name":"a-vol1",
"target_volume_id":"eb42f0a1-ab18-4010-b95c-cd1716dd5e95",
"create_target_volume_name":"",
"process_mode":{
"interval":0,
"offset":0
},
"file_types":[
2
],
"workflow":{
"components":[
{
"name":"DocumentCleaner",
"type":"haystack.components.preprocessors.document_cleaner.DocumentCleaner",
"component_id":"DocumentCleaner_1739377283742",
"intro":"DocumentCleaner",
"position":{
"x":0,
"y":0
},
"input_keys":{
},
"output_keys":{
},
"init_parameters":{
"ascii_only":"false",
"keep_id":"false",
"remove_empty_lines":"true",
"remove_extra_whitespaces":"true",
"remove_regex":"([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\\.[a-zA-Z0-9-.]+)|https?://[^\\s]+",
"remove_repeated_substrings":"false",
"remove_substrings":"null",
"unicode_normalization":"null"
}
},
{
"name":"DocumentCleaner-ImageCaption",
"type":"haystack.components.preprocessors.document_cleaner.DocumentCleaner",
"component_id":"DocumentCleaner-ImageCaption_1739377283742",
"intro":"DocumentCleaner-ImageCaption",
"position":{
"x":0,
"y":0
},
"input_keys":{
},
"output_keys":{
},
"init_parameters":{
"ascii_only":"false",
"keep_id":"false",
"remove_empty_lines":"true",
"remove_extra_whitespaces":"true",
"remove_regex":"([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\\.[a-zA-Z0-9-.]+)|https?://[^\\s]+",
"remove_repeated_substrings":"false",
"remove_substrings":"null",
"unicode_normalization":"null"
}
},
{
"name":"DocumentCleaner-ImageOCR",
"type":"haystack.components.preprocessors.document_cleaner.DocumentCleaner",
"component_id":"DocumentCleaner-ImageOCR_1739377283742",
"intro":"DocumentCleaner-ImageOCR",
"position":{
"x":0,
"y":0
},
"input_keys":{
},
"output_keys":{
},
"init_parameters":{
"ascii_only":"false",
"keep_id":"false",
"remove_empty_lines":"true",
"remove_extra_whitespaces":"true",
"remove_regex":"([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\\.[a-zA-Z0-9-.]+)|https?://[^\\s]+",
"remove_repeated_substrings":"false",
"remove_substrings":"null",
"unicode_normalization":"null"
}
},
{
"name":"DocumentEmbedder",
"type":"haystack.components.embedders.openai_document_embedder.OpenAIDocumentEmbedder","component_id":"DocumentEmbedder_1739377283742",
"intro":"DocumentEmbedder",
"position":{
"x":0,
"y":0
},
"input_keys":{
},
"output_keys":{
},
"init_parameters":{
"api_base_url":"https://api.siliconflow.cn/v1",
"api_key":{
"env_vars":[
"OPENAI_API_KEY"
],
"strict":"true",
"type":"env_var"
},
"batch_size":32,
"dimensions":"null",
"embedding_separator":"\n",
"meta_fields_to_embed":[
],
"model":"BAAI/bge-m3",
"organization":"null",
"prefix":"",
"progress_bar":"true",
"suffix":""
}
},
{
"name":"DocumentJoiner",
"type":"haystack.components.joiners.document_joiner.DocumentJoiner",
"component_id":"DocumentJoiner_1739377283742",
"intro":"DocumentJoiner",
"position":{
"x":0,
"y":0
},
"input_keys":{
},
"output_keys":{
},
"init_parameters":{
"join_mode":"concatenate",
"sort_by_score":"true",
"top_k":"null",
"weights":"null"
}
},
{
"name":"DocumentJoiner-Result",
"type":"haystack.components.joiners.document_joiner.DocumentJoiner",
"component_id":"DocumentJoiner-Result_1739377283742",
"intro":"DocumentJoiner-Result",
"position":{
"x":0,
"y":0
},
"input_keys":{
},
"output_keys":{
},
"init_parameters":{
"join_mode":"concatenate",
"sort_by_score":"true",
"top_k":"null",
"weights":"null"
}
},
{
"name":"DocumentSplitter",
"type":"haystack.components.preprocessors.document_splitter.DocumentSplitter",
"component_id":"DocumentSplitter_1739377283742",
"intro":"DocumentSplitter",
"position":{
"x":0,
"y":0
},
"input_keys":{
},
"output_keys":{
},
"init_parameters":{
"split_by":"word",
"split_length":800,
"split_overlap":200,
"split_threshold":0
}
},
{
"name":"DocumentWriter",
"type":"haystack.components.writers.document_writer.DocumentWriter",
"component_id":"DocumentWriter_1739377283742",
"intro":"DocumentWriter",
"position":{
"x":0,
"y":0
},
"input_keys":{
},
"output_keys":{
},
"init_parameters":{
"document_store":{
"init_parameters":{
"connection_string":{
"env_vars":[
"DATABASE_SYNC_URI"
],
"strict":"true",
"type":"env_var"
},
"embedding_dimension":1024,
"keyword_index_name":"haystack_keyword_index",
"recreate_table":"true",
"table_name":"embedding_results",
"vector_function":"cosine_similarity"
},
"type":"byoa.integrations.document_stores.mo_document_store.MOIDocumentStore"
},
"policy":"NONE"
}
},
{
"name":"FileRouterComponent",
"type":"haystack.components.routers.file_type_router.FileTypeRouter",
"component_id":"FileRouterComponent_1739377283742","intro":"FileRouterComponent",
"position":{
"x":0,
"y":0
},
"input_keys":{
},
"output_keys":{
},
"init_parameters":{
"additional_mimetypes":"null",
"mime_types":[
"text/plain",
"text/markdown",
"image/.*",
"application/pdf"
]
}
},
{
"name":"ImageCaptionToDocument",
"type":"byoa.integrations.components.converters.image_caption_to_document.ImageCaptionToDocument",
"component_id":"ImageCaptionToDocument_1739377283742",
"intro":"ImageCaptionToDocument",
"position":{
"x":0,
"y":0
},
"input_keys":{
},
"output_keys":{
},
"init_parameters":{
}
},
{
"name":"ImageOCRToDocument",
"type":"byoa.integrations.components.converters.image_ocr_to_document.ImageOCRToDocument",
"component_id":"ImageOCRToDocument_1739377283742",
"intro":"ImageOCRToDocument",
"position":{
"x":0,
"y":0
},
"input_keys":{
},
"output_keys":{
},
"init_parameters":{
"model":"ucaslcl/GOT-OCR2_0",
"tokenizer":"stepfun-ai/GOT-OCR2_0"
}
},
{
"name":"ImageToDocument",
"type":"byoa.integrations.components.converters.image_to_document.ImageToDocument",
"component_id":"ImageToDocument_1739377283742",
"intro":"ImageToDocument",
"position":{
"x":0,
"y":0
},
"input_keys":{
},
"output_keys":{
},
"init_parameters":{
}
},
{
"name":"MagicPDFToDocument",
"type":"byoa.integrations.components.converters.magic_pdf_to_document.MagicPDFToDocument",
"component_id":"MagicPDFToDocument_1739377283742",
"intro":"MagicPDFToDocument",
"position":{
"x":0,
"y":0
},
"input_keys":{
},
"output_keys":{
},
"init_parameters":{
}
},
{
"name":"MarkdownToDocument",
"type":"haystack.components.converters.markdown.MarkdownToDocument",
"component_id":"MarkdownToDocument_1739377283742",
"intro":"MarkdownToDocument",
"position":{
"x":0,
"y":0
},
"input_keys":{
},
"output_keys":{
},
"init_parameters":{
"progress_bar":"false",
"table_to_single_line":"false"
}
},
{
"name":"MetadataRouter",
"type":"haystack.components.routers.metadata_router.MetadataRouter",
"component_id":"MetadataRouter_1739377283742",
"intro":"MetadataRouter",
"position":{
"x":0,
"y":0
},
"input_keys":{
},
"output_keys":{
},
"init_parameters":{
"rules":{
"image":{
"conditions":[
{
"field":"meta.content_type",
"operator":"==",
"value":"image"
}
],
"operator":"AND"
},
"text":{
"conditions":[
{
"field":"meta.content_type",
"operator":"==",
"value":"text"
}],
"operator":"AND"
}
}
}
},
{
"name":"PythonExecutor",
"component_id":"PythonExecutor_1739377283742",
"intro":"PythonExecutor",
"position":{
"x":0,
"y":0
},
"input_keys":{
},
"output_keys":{
},
"init_parameters":{
"python_code":""
},
"type":"byoa.integrations.components.python_executor.PythonExecutor"
},
{
"name":"TextFileToDocument",
"type":"haystack.components.converters.txt.TextFileToDocument",
"component_id":"TextFileToDocument_1739377283742",
"intro":"TextFileToDocument",
"position":{
"x":0,
"y":0
},
"input_keys":{
},
"output_keys":{
},
"init_parameters":{
"encoding":"utf8"
}
}
],
"connections":[
{
"receiver":"TextFileToDocument.sources",
"sender":"FileRouterComponent.text/plain"
},
{
"receiver":"MarkdownToDocument.sources",
"sender":"FileRouterComponent.text/markdown"
},
{
"receiver":"ImageToDocument.sources",
"sender":"FileRouterComponent.image/.*"
},
{
"receiver":"MagicPDFToDocument.sources",
"sender":"FileRouterComponent.application/pdf"
},
{
"receiver":"DocumentJoiner.documents",
"sender":"TextFileToDocument.documents"
},
{
"receiver":"DocumentJoiner.documents",
"sender":"MarkdownToDocument.documents"
},
{
"receiver":"DocumentJoiner.documents",
"sender":"MagicPDFToDocument.documents"
},
{
"receiver":"DocumentJoiner.documents",
"sender":"ImageToDocument.documents"
},
{
"receiver":"MetadataRouter.documents",
"sender":"DocumentJoiner.documents"
},
{
"receiver":"DocumentCleaner.documents",
"sender":"MetadataRouter.text"
},
{
"receiver":"ImageOCRToDocument.documents",
"sender":"MetadataRouter.image"
},
{
"receiver":"ImageCaptionToDocument.documents",
"sender":"MetadataRouter.image"
},
{
"receiver":"DocumentSplitter.documents",
"sender":"DocumentCleaner.documents"
},
{
"receiver":"DocumentJoiner-Result.documents",
"sender":"DocumentSplitter.documents"
},
{
"receiver":"DocumentCleaner-ImageOCR.documents",
"sender":"ImageOCRToDocument.documents"
},
{
"receiver":"DocumentJoiner-Result.documents",
"sender":"DocumentCleaner-ImageOCR.documents"
},
{
"receiver":"DocumentCleaner-ImageCaption.documents",
"sender":"ImageCaptionToDocument.documents"
},
{
"receiver":"DocumentJoiner-Result.documents",
"sender":"DocumentCleaner-ImageCaption.documents"
},
{
"receiver":"PythonExecutor.documents",
"sender":"DocumentJoiner-Result.documents"
},
{
"receiver":"DocumentEmbedder.documents",
"sender":"PythonExecutor.documents"
},
{
"receiver":"DocumentWriter.documents",
"sender":"DocumentEmbedder.documents"
}
],
"edges":[
],
"extra_components":[
]
}
}
response = requests.post(url, json=body, headers=headers)
print(response.json())
返回:
{'code': 'ok', 'msg': 'ok', 'data': None}
查看工作流列表
GET /byoa/api/v1/index_workflow
示例:
import requests
import json
url = "https://freetier-01.cn-hangzhou.cluster.cn-dev.matrixone.tech/byoa/api/v1/index_workflow"
headers = {"user-id":"0194dfaa-3eda-7ea5-b47c-b4f4f594xxxx",
"Access-Token": "xxxx",
"uid": "d252447b-7f1d-4fd4-8b70-9bc2dd5cd505-0194dfaa-3eda-7ea5-b47c-b4f4f594xxxx:admin:accountadmin"
}
response = requests.get(url, headers=headers)
print("Response Body:", json.dumps(response.json(), indent=4, ensure_ascii=False))
Return:
Response Body: {
"code": "ok",
"msg": "ok",
"data": {
"total": 1,
"workflows": [
{
"id": "a029a904-1e1c-41af-b361-b6578c92a437",
"job_meta_id": "53d99056-b7b8-4d9e-baf1-3d3bca24d4f3",
"name": "wf-1",
"source_volume_ids": [
"1889223879880048640"
],
"source_volume_names": [
"b-vol1"
],
"file_types": [
2
],
"created_at": 1739377287000,
"creator": "admin",
"updated_at": 1739377755000,
"modifier": "admin",
"target_volume_id": "eb42f0a1-ab18-4010-b95c-cd1716dd5e95",
"target_volume_name": "a-vol1",
"process_mode": {
"interval": 0,
"offset": 0
},
"status": 2
}
]
}
}
查看工作流详情
GET /byoa/api/v1/index_workflow/{workflow_id}
View workflow details
GET /byoa/api/v1/index_workflow/{workflow_id}
Output parameters:
| Parameter | Meaning |
|---|---|
| id | Workflow id |
| name | Workflow name |
| job_meta_id | Job metadata id |
| source_volume_ids | Source volume list |
| source_volume_names | Source volume list |
| file_types | File type list |
| created_at | Created at |
| creator | Creator |
| updated_at | Updated at |
| modifier | Updater |
| target_volume_id | Target volume id |
| target_volume_name | Target volume name |
| process_mode | Processing mode |
| status | Status 1: Running; 2: Completed; 3: Stopped |
| workflow | Workflow |
Example:
import requests
import json
url = "https://freetier-01.cn-hangzhou.cluster.cn-dev.matrixone.tech/byoa/api/v1/index_workflow/ff5d119a-4e94-4968-ac0c-6ef64fcabb6c"
headers = {
"user-id":"0194dfaa-3eda-7ea5-b47c-b4f4f594xxxx",
"Access-Token": "xxxx",
"uid": "181c0bfb-486f-4e55-a4ea-7fa2a5dae4fa-0194dfaa-3eda-7ea5-b47c-b4f4f594xxxx:admin:accountadmin"
}
response = requests.get(url, headers=headers)
print("Response Body:", json.dumps(response.json(), indent=4, ensure_ascii=False))
Return:
Response Body: {
"code": "ok",
"msg": "ok",
"data": {
"id": "ff5d119a-4e94-4968-ac0c-6ef64fcabb6c",
"name": "test-2",
"job_meta_id": "edd6ffc3-5c96-4a1b-a6ef-01d21fdbb6d0",
"source_volume_ids": [
"1889223879880048640"
],
"source_volume_names": [
"b-vol1"
],
"file_types": [
2
],
"created_at": 1739435482000,
"creator": "admin",
"updated_at": 1739436347000,
"modifier": "admin",
"target_volume_id": "dbcc0d71-31f9-4799-b404-096f9e8e57f9",
"target_volume_name": "a-vol2",
"process_mode": {
"interval": 5,
"offset": 0
},
"status": 1,
"workflow": {
"components": [
{
"name": "DocumentCleaner",
"type": "haystack.components.preprocessors.document_cleaner.DocumentCleaner",
"component_id": "DocumentCleaner_1739435478121",
"intro": "DocumentCleaner",
"position": {
"x": 0,
"y": 0
},
"input_keys": {},
"output_keys": {},
"init_parameters": {
"ascii_only": false,
"keep_id": false,
"remove_empty_lines": false,
"remove_extra_whitespaces": false,
"remove_regex": null,
"remove_repeated_substrings": false,
"remove_substrings": null,
"unicode_normalization": null
}
},
{
"name": "DocumentCleaner-ImageCaption",
"type": "haystack.components.preprocessors.document_cleaner.DocumentCleaner",
"component_id": "DocumentCleaner-ImageCaption_1739435478121",
"intro": "DocumentCleaner-ImageCaption",
"position": {
"x": 0,
"y": 0
},
"input_keys": {},"output_keys": {},
"init_parameters": {
"ascii_only": false,
"keep_id": false,
"remove_empty_lines": false,
"remove_extra_whitespaces": false,
"remove_regex": null,
"remove_repeated_substrings": false,
"remove_substrings": null,
"unicode_normalization": null
}
},
{
"name": "DocumentCleaner-ImageOCR",
"type": "haystack.components.preprocessors.document_cleaner.DocumentCleaner",
"component_id": "DocumentCleaner-ImageOCR_1739435478121",
"intro": "DocumentCleaner-ImageOCR",
"position": {
"x": 0,
"y": 0
},
"input_keys": {},
"output_keys": {},
"init_parameters": {
"ascii_only": false,
"keep_id": false,
"remove_empty_lines": false,
"remove_extra_whitespaces": false,
"remove_regex": null,
"remove_repeated_substrings": false,
"remove_substrings": null,
"unicode_normalization": null
}
},
{
"name": "DocumentEmbedder",
"type": "haystack.components.embedders.openai_document_embedder.OpenAIDocumentEmbedder",
"component_id": "DocumentEmbedder_1739435478121",
"intro": "DocumentEmbedder",
"position": {
"x": 0,
"y": 0
},
"input_keys": {},
"output_keys": {},
"init_parameters": {
"api_base_url": "https://api.siliconflow.cn/v1",
"api_key": {
"env_vars": [
"OPENAI_API_KEY"
],
"strict": true,
"type": "env_var"
},
"batch_size": 32,
"dimensions": null,
"embedding_separator": "\n",
"meta_fields_to_embed": [],
"model": "BAAI/bge-m3",
"organization": null,
"prefix": "",
"progress_bar": true,
"suffix": ""
}
},
{
"name": "DocumentJoiner",
"type": "haystack.components.joiners.document_joiner.DocumentJoiner",
"component_id": "DocumentJoiner_1739435478121",
"intro": "DocumentJoiner",
"position": {
"x": 0,
"y": 0
},
"input_keys": {},
"output_keys": {},
"init_parameters": {
"join_mode": "concatenate",
"sort_by_score": true,
"top_k": null,
"weights": null
}
},
{
"name": "DocumentJoiner-Result",
"type": "haystack.components.joiners.document_joiner.DocumentJoiner",
"component_id": "DocumentJoiner-Result_1739435478121",
"intro": "DocumentJoiner-Result",
"position": {
"x": 0,
"y": 0
},
"input_keys": {},
"output_keys": {},
"init_parameters": {
"join_mode": "concatenate",
"sort_by_score": true,
"top_k": null,
"weights": null
}
},
{
"name": "DocumentSplitter",
"type": "haystack.components.preprocessors.document_splitter.DocumentSplitter",
"component_id": "DocumentSplitter_1739435478121",
"intro": "DocumentSplitter",
"position": {
"x": 0,
"y": 0
},
"input_keys": {},"output_keys": {},
"init_parameters": {
"split_by": "word",
"split_length": 800,
"split_overlap": 200,
"split_threshold": 0
}
},
{
"name": "DocumentWriter",
"type": "haystack.components.writers.document_writer.DocumentWriter",
"component_id": "DocumentWriter_1739435478121",
"intro": "DocumentWriter",
"position": {
"x": 0,
"y": 0
},
"input_keys": {},
"output_keys": {},
"init_parameters": {
"document_store": {
"init_parameters": {
"connection_string": {
"env_vars": [
"DATABASE_SYNC_URI"
],
"strict": true,
"type": "env_var"
},
"embedding_dimension": 1024,
"keyword_index_name": "haystack_keyword_index",
"recreate_table": true,
"table_name": "embedding_results",
"vector_function": "cosine_similarity"
},
"type": "byoa.integrations.document_stores.mo_document_store.MOIDocumentStore"
},
"policy": "NONE"
}
},
{
"name": "FileRouterComponent",
"type": "haystack.components.routers.file_type_router.FileTypeRouter",
"component_id": "FileRouterComponent_1739435478121",
"intro": "FileRouterComponent",
"position": {
"x": 0,
"y": 0
},
"input_keys": {},
"output_keys": {},
"init_parameters": {
"additional_mimetypes": null,
"mime_types": [
"text/plain",
"text/markdown",
"image/.*",
"application/pdf"
]
}
},
{
"name": "ImageCaptionToDocument",
"type": "byoa.integrations.components.converters.image_caption_to_document.ImageCaptionToDocument",
"component_id": "ImageCaptionToDocument_1739435478121",
"intro": "ImageCaptionToDocument",
"position": {
"x": 0,
"y": 0
},
"input_keys": {},
"output_keys": {},
"init_parameters": {}
},
{
"name": "ImageOCRToDocument",
"type": "byoa.integrations.components.converters.image_ocr_to_document.ImageOCRToDocument",
"component_id": "ImageOCRToDocument_1739435478121",
"intro": "ImageOCRToDocument",
"position": {
"x": 0,
"y": 0
},
"input_keys": {},
"output_keys": {},
"init_parameters": {
"model": "ucaslcl/GOT-OCR2_0",
"tokenizer": "stepfun-ai/GOT-OCR2_0"
}
},
{
"name": "ImageToDocument",
"type": "byoa.integrations.components.converters.image_to_document.ImageToDocument",
"component_id": "ImageToDocument_1739435478121",
"intro": "ImageToDocument",
"position": {
"x": 0,
"y": 0
},
"input_keys": {},
"output_keys": {},
"init_parameters": {}
},
{
"name": "MagicPDFToDocument",
"type": "byoa.integrations.components.converters.magic_pdf_to_document.MagicPDFToDocument",
"component_id": "MagicPDFToDocument_1739435478121","intro": "MagicPDFToDocument",
"position": {
"x": 0,
"y": 0
},
"input_keys": {},
"output_keys": {},
"init_parameters": {}
},
{
"name": "MarkdownToDocument",
"type": "haystack.components.converters.markdown.MarkdownToDocument",
"component_id": "MarkdownToDocument_1739435478121",
"intro": "MarkdownToDocument",
"position": {
"x": 0,
"y": 0
},
"input_keys": {},
"output_keys": {},
"init_parameters": {
"progress_bar": false,
"table_to_single_line": false
}
},
{
"name": "MetadataRouter",
"type": "haystack.components.routers.metadata_router.MetadataRouter",
"component_id": "MetadataRouter_1739435478121",
"intro": "MetadataRouter",
"position": {
"x": 0,
"y": 0
},
"input_keys": {},
"output_keys": {},
"init_parameters": {
"rules": {
"image": {
"conditions": [
{
"field": "meta.content_type",
"operator": "==",
"value": "image"
}
],
"operator": "AND"
},
"text": {
"conditions": [
{
"field": "meta.content_type",
"operator": "==",
"value": "text"
}
],
"operator": "AND"
}
}
}
},
{
"name": "PythonExecutor",
"component_id": "PythonExecutor_1739435478121",
"intro": "PythonExecutor",
"position": {
"x": 0,
"y": 0
},
"input_keys": {},
"output_keys": {},
"init_parameters": {
"python_code": ""
},
"type": "byoa.integrations.components.python_executor.PythonExecutor"
},
{
"name": "TextFileToDocument",
"type": "haystack.components.converters.txt.TextFileToDocument",
"component_id": "TextFileToDocument_1739435478121",
"intro": "TextFileToDocument",
"position": {
"x": 0,
"y": 0
},
"input_keys": {},
"output_keys": {},
"init_parameters": {
"encoding": "utf8"
}
}
],
"connections": [
{
"receiver": "TextFileToDocument.sources",
"sender": "FileRouterComponent.text/plain"
},
{
"receiver": "MarkdownToDocument.sources",
"sender": "FileRouterComponent.text/markdown"
},
{
"receiver": "ImageToDocument.sources",
"sender": "FileRouterComponent.image/.*"
},
{
"receiver": "MagicPDFToDocument.sources",
"sender": "FileRouterComponent.application/pdf"
},
{
"receiver": "DocumentJoiner.documents",
"sender": "TextFileToDocument.documents"
},
{
"receiver": "DocumentJoiner.documents",
"sender": "MarkdownToDocument.documents"
},
{
"receiver": "DocumentJoiner.documents","sender": "MagicPDFToDocument.documents"
},
{
"receiver": "DocumentJoiner.documents",
"sender": "ImageToDocument.documents"
},
{
"receiver": "MetadataRouter.documents",
"sender": "DocumentJoiner.documents"
},
{
"receiver": "DocumentCleaner.documents",
"sender": "MetadataRouter.text"
},
{
"receiver": "ImageOCRToDocument.documents",
"sender": "MetadataRouter.image"
},
{
"receiver": "ImageCaptionToDocument.documents",
"sender": "MetadataRouter.image"
},
{
"receiver": "DocumentSplitter.documents",
"sender": "DocumentCleaner.documents"
},
{
"receiver": "DocumentJoiner-Result.documents",
"sender": "DocumentSplitter.documents"
},
{
"receiver": "DocumentCleaner-ImageOCR.documents",
"sender": "ImageOCRToDocument.documents"
},
{
"receiver": "DocumentJoiner-Result.documents",
"sender": "DocumentCleaner-ImageOCR.documents"
},
{
"receiver": "DocumentCleaner-ImageCaption.documents",
"sender": "ImageCaptionToDocument.documents"
},
{
"receiver": "DocumentJoiner-Result.documents",
"sender": "DocumentCleaner-ImageCaption.documents"
},
{
"receiver": "PythonExecutor.documents",
"sender": "DocumentJoiner-Result.documents"
},
{
"receiver": "DocumentEmbedder.documents",
"sender": "PythonExecutor.documents"
},
{
"receiver": "DocumentWriter.documents",
"sender": "DocumentEmbedder.documents"
}
],
"edges": [],
"extra_components": []
}
}
}
Modify Workflow
POST /byoa/api/v1/index_workflow/{workflow_id}
For input parameters, refer to theCreate a workflow step above.
Example:
import requests
import json
url = "https://freetier-01.cn-hangzhou.cluster.cn-dev.matrixone.tech/byoa/api/v1/index_workflow/fef28ca2-175e-4de9-9ac3-f4aa0da5a745"
headers = {
"user-id":"0194dfaa-3eda-7ea5-b47c-b4f4f594xxxx",
"Access-Token": "xxxx",
"uid": "fa9f114e-77e0-4c23-aa0f-e982a5ec80e2-0194dfaa-3eda-7ea5-b47c-b4f4f594xxxx:admin:accountadmin"
}
body = {
"name":"wf-3",
"source_volume_names":[
"b-vol1"
],
"source_volume_ids":[
"1889223879880048640"
],
"target_volume_name":"a-vol1",
"target_volume_id":"eb42f0a1-ab18-4010-b95c-cd1716dd5e95",
"create_target_volume_name":"",
"process_mode":{
"interval":0,
"offset":0
},
"file_types":[
2
],
"workflow":{
"components":[
{
"name":"DocumentCleaner",
"type":"haystack.components.preprocessors.document_cleaner.DocumentCleaner",
"component_id":"DocumentCleaner_1739377283742",
"intro":"DocumentCleaner",
"position":{
"x":0,
"y":0
},
"input_keys":{
},
"output_keys":{
},
"init_parameters":{
"ascii_only":"false",
"keep_id":"false",
"remove_empty_lines":"true",
"remove_extra_whitespaces":"true",
"remove_regex":"([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\\.[a-zA-Z0-9-.]+)|https?://[^\\s]+",
"remove_repeated_substrings":"false",
"remove_substrings":"null",
"unicode_normalization":"null"
}
},
{
"name":"DocumentCleaner-ImageCaption",
"type":"haystack.components.preprocessors.document_cleaner.DocumentCleaner",
"component_id":"DocumentCleaner-ImageCaption_1739377283742",
"intro":"DocumentCleaner-ImageCaption",
"position":{
"x":0,
"y":0
},
"input_keys":{
},
"output_keys":{},
"init_parameters":{
"ascii_only":"false",
"keep_id":"false",
"remove_empty_lines":"true",
"remove_extra_whitespaces":"true",
"remove_regex":"([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\\.[a-zA-Z0-9-.]+)|https?://[^\\s]+",
"remove_repeated_substrings":"false",
"remove_substrings":"null",
"unicode_normalization":"null"
}
},
{
"name":"DocumentCleaner-ImageOCR",
"type":"haystack.components.preprocessors.document_cleaner.DocumentCleaner",
"component_id":"DocumentCleaner-ImageOCR_1739377283742",
"intro":"DocumentCleaner-ImageOCR",
"position":{
"x":0,
"y":0
},
"input_keys":{
},
"output_keys":{
},
"init_parameters":{
"ascii_only":"false",
"keep_id":"false",
"remove_empty_lines":"true",
"remove_extra_whitespaces":"true",
"remove_regex":"([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\\.[a-zA-Z0-9-.]+)|https?://[^\\s]+",
"remove_repeated_substrings":"false",
"remove_substrings":"null",
"unicode_normalization":"null"
}
},
{
"name":"DocumentEmbedder",
"type":"haystack.components.embedders.openai_document_embedder.OpenAIDocumentEmbedder",
"component_id":"DocumentEmbedder_1739377283742",
"intro":"DocumentEmbedder",
"position":{
"x":0,
"y":0
},
"input_keys":{
},
"output_keys":{
},
"init_parameters":{
"api_base_url":"https://api.siliconflow.cn/v1",
"api_key":{
"env_vars":[
"OPENAI_API_KEY"
],
"strict":"true",
"type":"env_var"
},
"batch_size":32,
"dimensions":"null",
"embedding_separator":"\n",
"meta_fields_to_embed":[
],
"model":"BAAI/bge-m3",
"organization":"null",
"prefix":"",
"progress_bar":"true",
"suffix":""
}
},
{
"name":"DocumentJoiner",
"type":"haystack.components.joiners.document_joiner.DocumentJoiner",
"component_id":"DocumentJoiner_1739377283742",
"intro":"DocumentJoiner",
"position":{
"x":0,
"y":0
},
"input_keys":{
},
"output_keys":{
},
"init_parameters":{
"join_mode":"concatenate",
"sort_by_score":"true",
"top_k":"null",
"weights":"null"
}
},
{
"name":"DocumentJoiner-Result",
"type":"haystack.components.joiners.document_joiner.DocumentJoiner",
"component_id":"DocumentJoiner-Result_1739377283742",
"intro":"DocumentJoiner-Result",
"position":{
"x":0,
"y":0
},
"input_keys":{
},
"output_keys":{
},
"init_parameters":{
"join_mode":"concatenate",
"sort_by_score":"true",
"top_k":"null",
"weights":"null"
}
},
{
"name":"DocumentSplitter",
"type":"haystack.components.preprocessors.document_splitter.DocumentSplitter",
"component_id":"DocumentSplitter_1739377283742",
"intro":"DocumentSplitter",
"position":{
"x":0,
"y":0
},
"input_keys":{
},
"output_keys":{
},
"init_parameters":{
"split_by":"word",
"split_length":800,
"split_overlap":200,"split_threshold":0
}
},
{
"name":"DocumentWriter",
"type":"haystack.components.writers.document_writer.DocumentWriter",
"component_id":"DocumentWriter_1739377283742",
"intro":"DocumentWriter",
"position":{
"x":0,
"y":0
},
"input_keys":{
},
"output_keys":{
},
"init_parameters":{
"document_store":{
"init_parameters":{
"connection_string":{
"env_vars":[
"DATABASE_SYNC_URI"
],
"strict":"true",
"type":"env_var"
},
"embedding_dimension":1024,
"keyword_index_name":"haystack_keyword_index",
"recreate_table":"true",
"table_name":"embedding_results",
"vector_function":"cosine_similarity"
},
"type":"byoa.integrations.document_stores.mo_document_store.MOIDocumentStore"
},
"policy":"NONE"
}
},
{
"name":"FileRouterComponent",
"type":"haystack.components.routers.file_type_router.FileTypeRouter",
"component_id":"FileRouterComponent_1739377283742",
"intro":"FileRouterComponent",
"position":{
"x":0,
"y":0
},
"input_keys":{
},
"output_keys":{
},
"init_parameters":{
"additional_mimetypes":"null",
"mime_types":[
"text/plain",
"text/markdown",
"image/.*",
"application/pdf"
]
}
},
{
"name":"ImageCaptionToDocument",
"type":"byoa.integrations.components.converters.image_caption_to_document.ImageCaptionToDocument",
"component_id":"ImageCaptionToDocument_1739377283742",
"intro":"ImageCaptionToDocument",
"position":{
"x":0,
"y":0
},
"input_keys":{
},
"output_keys":{
},
"init_parameters":{
}
},
{
"name":"ImageOCRToDocument",
"type":"byoa.integrations.components.converters.image_ocr_to_document.ImageOCRToDocument",
"component_id":"ImageOCRToDocument_1739377283742",
"intro":"ImageOCRToDocument",
"position":{
"x":0,
"y":0
},
"input_keys":{
},
"output_keys":{
},
"init_parameters":{
"model":"ucaslcl/GOT-OCR2_0",
"tokenizer":"stepfun-ai/GOT-OCR2_0"
}
},
{
"name":"ImageToDocument",
"type":"byoa.integrations.components.converters.image_to_document.ImageToDocument",
"component_id":"ImageToDocument_1739377283742",
"intro":"ImageToDocument",
"position":{
"x":0,
"y":0
},
"input_keys":{
},
"output_keys":{
},
"init_parameters":{
}
},
{
"name":"MagicPDFToDocument",
"type":"byoa.integrations.components.converters.magic_pdf_to_document.MagicPDFToDocument",
"component_id":"MagicPDFToDocument_1739377283742",
"intro":"MagicPDFToDocument",
"position":{
"x":0,
"y":0
},
"input_keys":{
},
"output_keys":{
},
"init_parameters":{
}
},
{
"name":"MarkdownToDocument",
"type":"haystack.components.converters.markdown.MarkdownToDocument","component_id":"MarkdownToDocument_1739377283742",
"intro":"MarkdownToDocument",
"position":{
"x":0,
"y":0
},
"input_keys":{
},
"output_keys":{
},
"init_parameters":{
"progress_bar":"false",
"table_to_single_line":"false"
}
},
{
"name":"MetadataRouter",
"type":"haystack.components.routers.metadata_router.MetadataRouter",
"component_id":"MetadataRouter_1739377283742",
"intro":"MetadataRouter",
"position":{
"x":0,
"y":0
},
"input_keys":{
},
"output_keys":{
},
"init_parameters":{
"rules":{
"image":{
"conditions":[
{
"field":"meta.content_type",
"operator":"==",
"value":"image"
}
],
"operator":"AND"
},
"text":{
"conditions":[
{
"field":"meta.content_type",
"operator":"==",
"value":"text"
}
],
"operator":"AND"
}
}
}
},
{
"name":"PythonExecutor",
"component_id":"PythonExecutor_1739377283742",
"intro":"PythonExecutor",
"position":{
"x":0,
"y":0
},
"input_keys":{
},
"output_keys":{
},
"init_parameters":{
"python_code":""
},
"type":"byoa.integrations.components.python_executor.PythonExecutor"
},
{
"name":"TextFileToDocument",
"type":"haystack.components.converters.txt.TextFileToDocument",
"component_id":"TextFileToDocument_1739377283742",
"intro":"TextFileToDocument",
"position":{
"x":0,
"y":0
},
"input_keys":{
},
"output_keys":{
},
"init_parameters":{
"encoding":"utf8"
}
}
],
"connections":[
{
"receiver":"TextFileToDocument.sources",
"sender":"FileRouterComponent.text/plain"
},
{
"receiver":"MarkdownToDocument.sources",
"sender":"FileRouterComponent.text/markdown"
},
{
"receiver":"ImageToDocument.sources",
"sender":"FileRouterComponent.image/.*"
},
{
"receiver":"MagicPDFToDocument.sources",
"sender":"FileRouterComponent.application/pdf"
},
{
"receiver":"DocumentJoiner.documents",
"sender":"TextFileToDocument.documents"
},
{
"receiver":"DocumentJoiner.documents",
"sender":"MarkdownToDocument.documents"
},
{
"receiver":"DocumentJoiner.documents",
"sender":"MagicPDFToDocument.documents"
},
{
"receiver":"DocumentJoiner.documents",
"sender":"ImageToDocument.documents"
},
{
"receiver":"MetadataRouter.documents",
"sender":"DocumentJoiner.documents"
},
{
"receiver":"DocumentCleaner.documents",
"sender":"MetadataRouter.text"
},
{
"receiver":"ImageOCRToDocument.documents",
"sender":"MetadataRouter.image"
},
{
"receiver":"ImageCaptionToDocument.documents",
"sender":"MetadataRouter.image"
},
{
"receiver":"DocumentSplitter.documents","sender":"DocumentCleaner.documents"
},
{
"receiver":"DocumentJoiner-Result.documents",
"sender":"DocumentSplitter.documents"
},
{
"receiver":"DocumentCleaner-ImageOCR.documents",
"sender":"ImageOCRToDocument.documents"
},
{
"receiver":"DocumentJoiner-Result.documents",
"sender":"DocumentCleaner-ImageOCR.documents"
},
{
"receiver":"DocumentCleaner-ImageCaption.documents",
"sender":"ImageCaptionToDocument.documents"
},
{
"receiver":"DocumentJoiner-Result.documents",
"sender":"DocumentCleaner-ImageCaption.documents"
},
{
"receiver":"PythonExecutor.documents",
"sender":"DocumentJoiner-Result.documents"
},
{
"receiver":"DocumentEmbedder.documents",
"sender":"PythonExecutor.documents"
},
{
"receiver":"DocumentWriter.documents",
"sender":"DocumentEmbedder.documents"
}
],
"edges":[
],
"extra_components":[
]
}
}
response = requests.put(url, json=body, headers=headers)
print(response.json())
Return:
{'code': 'ok', 'msg': 'ok', 'data': None}
Delete Workflow
DELETE /byoa/api/v1/index_workflow/{workflow_id}?[delete_data=true]
Example
import requests
import json
url = "https://freetier-01.cn-hangzhou.cluster.cn-dev.matrixone.tech/byoa/api/v1/index_workflow/729e7a03-652d-46e0-bdad-b05ec5b80cea?delete_data=true"
headers = {
"user-id":"0194dfaa-3eda-7ea5-b47c-b4f4f594xxxx",
"Access-Token": "xxxx",
"uid": "011d4b66-ace5-4d58-88a4-bc76719acda5-0194dfaa-3eda-7ea5-b47c-b4f4f594xxxx:admin:accountadmin"
}
response = requests.delete(url, headers=headers)
if response.status_code == 200:
print(response.json())
else:
print(f"请求失败,状态码:{response.status_code}, 错误信息:{response.text}")
Return:
{'code': 'ok', 'msg': 'ok', 'data': None}
Job
View the job list
GET /byoa/api/v1/index_workflow_job
Output parameters:
| Parameter | Meaning |
|---|---|
| id | Job id |
| workflow_name | Workflow name |
| name | Connector name |
| source_volume_names | Original volume name |
| source_volume_ids | Original volume id |
| target_volume_name | Target volume name |
| target_volume_id | Target volume id |
| file_types | File type, 2 is pdf format |
| status | Workflow status |
| workflow_id | Workflow id |
Example:
import requests
import json
url = "https://freetier-01.cn-hangzhou.cluster.cn-dev.matrixone.tech/byoa/api/v1/index_workflow_job"
headers = {
"user-id":"0194dfaa-3eda-7ea5-b47c-b4f4f594xxxx",
"Access-Token": "xxxx",
"uid": "011d4b66-ace5-4d58-88a4-bc76719acda5-0194dfaa-3eda-7ea5-b47c-b4f4f594xxxx:admin:accountadmin"
}
response = requests.get(url, headers=headers)
print("Response Body:", json.dumps(response.json(), indent=4, ensure_ascii=False))
Return:
Response Body: {
"code": "ok",
"msg": "ok",
"data": {
"total": 10,
"jobs": [
{
"id": "0194fb2c-f5c5-7d42-8d09-fdc3d8414777",
"workflow_name": "wf-2",
"source_volume_names": [
"b-vol1"
],
"source_volume_ids": [
"1889223879880048640"
],
"target_volume_name": "a-vol2",
"target_volume_id": "dbcc0d71-31f9-4799-b404-096f9e8e57f9",
"file_types": [
2
],
"start_time": "2025-02-12T17:20:15.000000+0000",
"end_time": "2025-02-12T17:20:15.000000+0000",
"duration": 0,
"processed_count": 0,
"total_count": 0,
"status": 2,
"workflow_id": "729e7a03-652d-46e0-bdad-b05ec5b80cea",
"workflow": {
"components": null,
"connections": null,
"edges": null,
"extra_components": null
}
},
{
"id": "0194fb28-61ae-7aac-8500-a4c924a68211",
"workflow_name": "wf-4",
"source_volume_names": [
"b-vol1"
],
"source_volume_ids": [
"1889223879880048640"
],
"target_volume_name": "a-vol2","target_volume_id": "dbcc0d71-31f9-4799-b404-096f9e8e57f9",
"file_types": [
2
],
"start_time": "2025-02-12T17:15:15.000000+0000",
"end_time": "2025-02-12T17:21:17.000000+0000",
"duration": 362,
"processed_count": 1,
"total_count": 1,
"status": 2,
"workflow_id": "c6dcbad5-f85d-42b7-942c-2e8d3445a4e6",
"workflow": {
"components": null,
"connections": null,
"edges": null,
"extra_components": null
}
},
{
"id": "0194fb28-61af-708a-915e-6f140a2424fe",
"workflow_name": "wf-2",
"source_volume_names": [
"b-vol1"
],
"source_volume_ids": [
"1889223879880048640"
],
"target_volume_name": "a-vol2",
"target_volume_id": "dbcc0d71-31f9-4799-b404-096f9e8e57f9",
"file_types": [
2
],
"start_time": "2025-02-12T17:15:15.000000+0000",
"end_time": "2025-02-12T17:15:15.000000+0000",
"duration": 0,
"processed_count": 0,
"total_count": 0,
"status": 2,
"workflow_id": "729e7a03-652d-46e0-bdad-b05ec5b80cea",
"workflow": {
"components": null,
"connections": null,
"edges": null,
"extra_components": null
}
},
{
"id": "0194fb23-cd65-767c-b58f-db7c4456b896",
"workflow_name": "wf-2",
"source_volume_names": [
"b-vol1"
],
"source_volume_ids": [
"1889223879880048640"
],
"target_volume_name": "a-vol2",
"target_volume_id": "dbcc0d71-31f9-4799-b404-096f9e8e57f9",
"file_types": [
2
],
"start_time": "2025-02-12T17:10:15.000000+0000",
"end_time": "2025-02-12T17:16:17.000000+0000",
"duration": 362,
"processed_count": 1,
"total_count": 1,
"status": 2,
"workflow_id": "729e7a03-652d-46e0-bdad-b05ec5b80cea",
"workflow": {
"components": null,
"connections": null,
"edges": null,
"extra_components": null
}
},
{
"id": "0194fb06-8044-70e4-8a54-16f5a0e3c720",
"workflow_name": "wf-3",
"source_volume_names": [
"b-vol1"
],
"source_volume_ids": [
"1889223879880048640"
],
"target_volume_name": "a-vol1",
"target_volume_id": "eb42f0a1-ab18-4010-b95c-cd1716dd5e95",
"file_types": [
2
],
"start_time": "2025-02-12T16:38:15.000000+0000",
"end_time": "2025-02-12T16:39:16.000000+0000",
"duration": 61,
"processed_count": 1,
"total_count": 1,
"status": 3,
"workflow_id": "2c0be55b-af55-4787-baac-3d8e7d987fe7",
"workflow": {
"components": null,
"connections": null,
"edges": null,
"extra_components": null
}
},
{
"id": "0194faf7-d9a0-7347-8726-a86f52cf67c7",
"workflow_name": "wf-1",
"source_volume_names": [
"b-vol1"
],
"source_volume_ids": [
"1889223879880048640"
],
"target_volume_name": "a-vol1",
"target_volume_id": "eb42f0a1-ab18-4010-b95c-cd1716dd5e95",
"file_types": [
2
],
"start_time": "2025-02-12T16:22:15.000000+0000",
"end_time": "2025-02-12T16:28:16.000000+0000",
"duration": 361,
"processed_count": 1,
"total_count": 1,
"status": 2,
"workflow_id": "a029a904-1e1c-41af-b361-b6578c92a437",
"workflow": {
"components": null,"connections": null,
"edges": null,
"extra_components": null
}
},
{
"id": "0194f423-c2a7-7cc5-87ce-97fa942ac6ce",
"workflow_name": "wk-3",
"source_volume_names": [
"b-vol1"
],
"source_volume_ids": [
"1889223879880048640"
],
"target_volume_name": "a-vol1",
"target_volume_id": "eb42f0a1-ab18-4010-b95c-cd1716dd5e95",
"file_types": [
2
],
"start_time": "2025-02-11T08:32:52.000000+0000",
"end_time": "2025-02-11T08:38:52.000000+0000",
"duration": 360,
"processed_count": 1,
"total_count": 1,
"status": 2,
"workflow_id": "4f209aa9-186c-442a-b324-d7eebaca4cd0",
"workflow": {
"components": null,
"connections": null,
"edges": null,
"extra_components": null
}
},
{
"id": "0194f41d-59d3-7899-9fa8-24343214df7f",
"workflow_name": "wf-3",
"source_volume_names": [
"b-vol1"
],
"source_volume_ids": [
"1889223879880048640"
],
"target_volume_name": "a-vol1",
"target_volume_id": "eb42f0a1-ab18-4010-b95c-cd1716dd5e95",
"file_types": [
2
],
"start_time": "2025-02-11T08:25:52.000000+0000",
"end_time": "2025-02-11T08:31:52.000000+0000",
"duration": 360,
"processed_count": 1,
"total_count": 1,
"status": 2,
"workflow_id": "ea64f8ba-b984-46a3-acb0-628849538244",
"workflow": {
"components": null,
"connections": null,
"edges": null,
"extra_components": null
}
},
{
"id": "0194f40f-9da9-7927-890c-4bea252e0235",
"workflow_name": "wf-2",
"source_volume_names": [
"b-vol1"
],
"source_volume_ids": [
"1889223879880048640"
],
"target_volume_name": "a-vol1",
"target_volume_id": "eb42f0a1-ab18-4010-b95c-cd1716dd5e95",
"file_types": [
2
],
"start_time": "2025-02-11T08:10:52.000000+0000",
"end_time": "2025-02-11T08:10:52.000000+0000",
"duration": 0,
"processed_count": 0,
"total_count": 0,
"status": 2,
"workflow_id": "d2842368-37dc-4b49-930a-25f16a8fc0c8",
"workflow": {
"components": null,
"connections": null,
"edges": null,
"extra_components": null
}
},
{
"id": "0194f40c-deb5-7466-bd7f-7c930a034bcd",
"workflow_name": "wf-1",
"source_volume_names": [
"b-vol1"
],
"source_volume_ids": [
"1889223879880048640"
],
"target_volume_name": "a-vol1",
"target_volume_id": "eb42f0a1-ab18-4010-b95c-cd1716dd5e95",
"file_types": [
2
],
"start_time": "2025-02-11T08:07:52.000000+0000",
"end_time": "2025-02-11T08:07:52.000000+0000",
"duration": 0,
"processed_count": 0,
"total_count": 0,
"status": 2,
"workflow_id": "f6c0b040-5403-42b9-a914-bbf2935d69f0",
"workflow": {
"components": null,
"connections": null,
"edges": null,
"extra_components": null
}
}
]
}
}
View job details
GET /byoa/api/v1/index_workflow_job/{job_id}
Example:
import requests
import json
url = "https://freetier-01.cn-hangzhou.cluster.cn-dev.matrixone.tech/byoa/api/v1/index_workflow_job/0194fb2c-f5c5-7d42-8d09-fdc3d8414777"
headers = {
"user-id":"0194dfaa-3eda-7ea5-b47c-b4f4f594xxxx",
"Access-Token": "xxxx","uid": "a6e11303-f4fd-46c0-b5ff-c774e96f64a3-0194dfaa-3eda-7ea5-b47c-b4f4f594xxxx:admin:accountadmin"
}
response = requests.get(url, headers=headers)
print("Response Body:", json.dumps(response.json(), indent=4, ensure_ascii=False))
return
Response Body: {
"code": "ok",
"msg": "ok",
"data": {
"id": "0194fb2c-f5c5-7d42-8d09-fdc3d8414777",
"workflow_name": "wf-2",
"source_volume_names": [
"b-vol1"
],
"source_volume_ids": [
"1889223879880048640"
],
"target_volume_name": "a-vol2",
"target_volume_id": "dbcc0d71-31f9-4799-b404-096f9e8e57f9",
"file_types": [
2
],
"start_time": "2025-02-12T17:20:15.000000+0000",
"end_time": "2025-02-12T17:20:15.000000+0000",
"duration": 0,
"processed_count": 0,
"total_count": 0,
"status": 2,
"workflow_id": "729e7a03-652d-46e0-bdad-b05ec5b80cea",
"workflow": {
"components": [
{
"name": "DocumentCleaner",
"type": "haystack.components.preprocessors.document_cleaner.DocumentCleaner",
"component_id": "DocumentCleaner_1739380168023",
"intro": "DocumentCleaner",
"position": {
"x": 0,
"y": 0
},
"input_keys": {},
"output_keys": {},
"init_parameters": {
"ascii_only": false,
"keep_id": false,
"remove_empty_lines": true,
"remove_extra_whitespaces": true,
"remove_regex": null,
"remove_repeated_substrings": false,
"remove_substrings": null,
"unicode_normalization": null
}
},
{
"name": "DocumentCleaner-ImageCaption",
"type": "haystack.components.preprocessors.document_cleaner.DocumentCleaner",
"component_id": "DocumentCleaner-ImageCaption_1739380168023",
"intro": "DocumentCleaner-ImageCaption",
"position": {
"x": 0,
"y": 0
},
"input_keys": {},
"output_keys": {},
"init_parameters": {
"ascii_only": false,
"keep_id": false,
"remove_empty_lines": true,
"remove_extra_whitespaces": true,
"remove_regex": null,
"remove_repeated_substrings": false,
"remove_substrings": null,
"unicode_normalization": null
}
},
{
"name": "DocumentCleaner-ImageOCR",
"type": "haystack.components.preprocessors.document_cleaner.DocumentCleaner",
"component_id": "DocumentCleaner-ImageOCR_1739380168023",
"intro": "DocumentCleaner-ImageOCR",
"position": {
"x": 0,
"y": 0
},
"input_keys": {},
"output_keys": {},
"init_parameters": {
"ascii_only": false,
"keep_id": false,
"remove_empty_lines": true,
"remove_extra_whitespaces": true,
"remove_regex": null,
"remove_repeated_substrings": false,
"remove_substrings": null,
"unicode_normalization": null
}
},
{
"name": "DocumentEmbedder",
"type": "haystack.components.embedders.openai_document_embedder.OpenAIDocumentEmbedder",
"component_id": "DocumentEmbedder_1739380168023",
"intro": "DocumentEmbedder",
"position": {
"x": 0,
"y": 0
},
"input_keys": {},
"output_keys": {},
"init_parameters": {
"api_base_url": "https://api.siliconflow.cn/v1",
"api_key": {
"env_vars": [
"OPENAI_API_KEY"
],"strict": true,
"type": "env_var"
},
"batch_size": 32,
"dimensions": null,
"embedding_separator": "\n",
"meta_fields_to_embed": [],
"model": "BAAI/bge-m3",
"organization": null,
"prefix": "",
"progress_bar": true,
"suffix": ""
}
},
{
"name": "DocumentJoiner",
"type": "haystack.components.joiners.document_joiner.DocumentJoiner",
"component_id": "DocumentJoiner_1739380168023",
"intro": "DocumentJoiner",
"position": {
"x": 0,
"y": 0
},
"input_keys": {},
"output_keys": {},
"init_parameters": {
"join_mode": "concatenate",
"sort_by_score": true,
"top_k": null,
"weights": null
}
},
{
"name": "DocumentJoiner-Result",
"type": "haystack.components.joiners.document_joiner.DocumentJoiner",
"component_id": "DocumentJoiner-Result_1739380168023",
"intro": "DocumentJoiner-Result",
"position": {
"x": 0,
"y": 0
},
"input_keys": {},
"output_keys": {},
"init_parameters": {
"join_mode": "concatenate",
"sort_by_score": true,
"top_k": null,
"weights": null
}
},
{
"name": "DocumentSplitter",
"type": "haystack.components.preprocessors.document_splitter.DocumentSplitter",
"component_id": "DocumentSplitter_1739380168023",
"intro": "DocumentSplitter",
"position": {
"x": 0,
"y": 0
},
"input_keys": {},
"output_keys": {},
"init_parameters": {
"split_by": "word",
"split_length": 200,
"split_overlap": 200,
"split_threshold": 0
}
},
{
"name": "DocumentWriter",
"type": "haystack.components.writers.document_writer.DocumentWriter",
"component_id": "DocumentWriter_1739380168023",
"intro": "DocumentWriter",
"position": {
"x": 0,
"y": 0
},
"input_keys": {},
"output_keys": {},
"init_parameters": {
"document_store": {
"init_parameters": {
"connection_string": {
"env_vars": [
"DATABASE_SYNC_URI"
],
"strict": true,
"type": "env_var"
},
"embedding_dimension": 1024,
"keyword_index_name": "haystack_keyword_index",
"recreate_table": true,
"table_name": "embedding_results",
"vector_function": "cosine_similarity"
},
"type": "byoa.integrations.document_stores.mo_document_store.MOIDocumentStore"
},
"policy": "NONE"
}
},
{
"name": "FileRouterComponent",
"type": "haystack.components.routers.file_type_router.FileTypeRouter",
"component_id": "FileRouterComponent_1739380168023",
"intro": "FileRouterComponent",
"position": {
"x": 0,
"y": 0
},
"input_keys": {},
"output_keys": {},"init_parameters": {
"additional_mimetypes": null,
"mime_types": [
"text/plain",
"text/markdown",
"image/.*",
"application/pdf"
]
}
},
{
"name": "ImageCaptionToDocument",
"type": "byoa.integrations.components.converters.image_caption_to_document.ImageCaptionToDocument",
"component_id": "ImageCaptionToDocument_1739380168023",
"intro": "ImageCaptionToDocument",
"position": {
"x": 0,
"y": 0
},
"input_keys": {},
"output_keys": {},
"init_parameters": {}
},
{
"name": "ImageOCRToDocument",
"type": "byoa.integrations.components.converters.image_ocr_to_document.ImageOCRToDocument",
"component_id": "ImageOCRToDocument_1739380168023",
"intro": "ImageOCRToDocument",
"position": {
"x": 0,
"y": 0
},
"input_keys": {},
"output_keys": {},
"init_parameters": {
"model": "ucaslcl/GOT-OCR2_0",
"tokenizer": "stepfun-ai/GOT-OCR2_0"
}
},
{
"name": "ImageToDocument",
"type": "byoa.integrations.components.converters.image_to_document.ImageToDocument",
"component_id": "ImageToDocument_1739380168023",
"intro": "ImageToDocument",
"position": {
"x": 0,
"y": 0
},
"input_keys": {},
"output_keys": {},
"init_parameters": {}
},
{
"name": "MagicPDFToDocument",
"type": "byoa.integrations.components.converters.magic_pdf_to_document.MagicPDFToDocument",
"component_id": "MagicPDFToDocument_1739380168023",
"intro": "MagicPDFToDocument",
"position": {
"x": 0,
"y": 0
},
"input_keys": {},
"output_keys": {},
"init_parameters": {}
},
{
"name": "MarkdownToDocument",
"type": "haystack.components.converters.markdown.MarkdownToDocument",
"component_id": "MarkdownToDocument_1739380168023",
"intro": "MarkdownToDocument",
"position": {
"x": 0,
"y": 0
},
"input_keys": {},
"output_keys": {},
"init_parameters": {
"progress_bar": false,
"table_to_single_line": false
}
},
{
"name": "MetadataRouter",
"type": "haystack.components.routers.metadata_router.MetadataRouter",
"component_id": "MetadataRouter_1739380168023",
"intro": "MetadataRouter",
"position": {
"x": 0,
"y": 0
},
"input_keys": {},
"output_keys": {},
"init_parameters": {
"rules": {
"image": {
"conditions": [
{
"field": "meta.content_type",
"operator": "==",
"value": "image"
}
],
"operator": "AND"
},
"text": {
"conditions": [
{
"field": "meta.content_type",
"operator": "==",
"value": "text"
}],
"operator": "AND"
}
}
}
},
{
"name": "PythonExecutor",
"component_id": "PythonExecutor_1739380168023",
"intro": "PythonExecutor",
"position": {
"x": 0,
"y": 0
},
"input_keys": {},
"output_keys": {},
"init_parameters": {
"python_code": ""
},
"type": "byoa.integrations.components.python_executor.PythonExecutor"
},
{
"name": "TextFileToDocument",
"type": "haystack.components.converters.txt.TextFileToDocument",
"component_id": "TextFileToDocument_1739380168023",
"intro": "TextFileToDocument",
"position": {
"x": 0,
"y": 0
},
"input_keys": {},
"output_keys": {},
"init_parameters": {
"encoding": "utf8"
}
}
],
"connections": [
{
"receiver": "TextFileToDocument.sources",
"sender": "FileRouterComponent.text/plain"
},
{
"receiver": "MarkdownToDocument.sources",
"sender": "FileRouterComponent.text/markdown"
},
{
"receiver": "ImageToDocument.sources",
"sender": "FileRouterComponent.image/.*"
},
{
"receiver": "MagicPDFToDocument.sources",
"sender": "FileRouterComponent.application/pdf"
},
{
"receiver": "DocumentJoiner.documents",
"sender": "TextFileToDocument.documents"
},
{
"receiver": "DocumentJoiner.documents",
"sender": "MarkdownToDocument.documents"
},
{
"receiver": "DocumentJoiner.documents",
"sender": "MagicPDFToDocument.documents"
},
{
"receiver": "DocumentJoiner.documents",
"sender": "ImageToDocument.documents"
},
{
"receiver": "MetadataRouter.documents",
"sender": "DocumentJoiner.documents"
},
{
"receiver": "DocumentCleaner.documents",
"sender": "MetadataRouter.text"
},
{
"receiver": "ImageOCRToDocument.documents",
"sender": "MetadataRouter.image"
},
{
"receiver": "ImageCaptionToDocument.documents",
"sender": "MetadataRouter.image"
},
{
"receiver": "DocumentSplitter.documents",
"sender": "DocumentCleaner.documents"
},
{
"receiver": "DocumentJoiner-Result.documents",
"sender": "DocumentSplitter.documents"
},
{
"receiver": "DocumentCleaner-ImageOCR.documents",
"sender": "ImageOCRToDocument.documents"
},
{
"receiver": "DocumentJoiner-Result.documents",
"sender": "DocumentCleaner-ImageOCR.documents"
},
{
"receiver": "DocumentCleaner-ImageCaption.documents",
"sender": "ImageCaptionToDocument.documents"
},
{
"receiver": "DocumentJoiner-Result.documents",
"sender": "DocumentCleaner-ImageCaption.documents"
},
{
"receiver": "PythonExecutor.documents",
"sender": "DocumentJoiner-Result.documents"
},
{
"receiver": "DocumentEmbedder.documents",
"sender": "PythonExecutor.documents"
},
{
"receiver": "DocumentWriter.documents",
"sender": "DocumentEmbedder.documents"
}],
"edges": [],
"extra_components": []
}
}
}
View the file list of job-related
GET /byoa/api/v1/index_workflow_job/{job_id}/files
Output parameters:
| Parameters | Meaning |
|---|---|
| id | File id |
| file_type | File type, 2 is pdf. |
| file_status | file status |
Example:
import requests
import json
url = "https://freetier-01.cn-hangzhou.cluster.cn-dev.matrixone.tech/byoa/api/v1/index_workflow_job/0194f423-c2a7-7cc5-87ce-97fa942ac6ce/files"
headers = {
"user-id":"0194dfaa-3eda-7ea5-b47c-b4f4f594xxxx",
"Access-Token": "xxxx",
"uid": "a6e11303-f4fd-46c0-b5ff-c774e96f64a3-0194dfaa-3eda-7ea5-b47c-b4f4f594xxxx:admin:accountadmin"
}
response = requests.get(url, headers=headers)
print("Response Body:", json.dumps(response.json(), indent=4, ensure_ascii=False))
return
Response Body: {
"code": "ok",
"msg": "ok",
"data": {
"files": [
{
"id": "0194f423-c2a7-7ccc-a3da-732bafda96a3",
"file_name": "Dream of the Red Chamber (Public Book) Simplified Chinese horizontal row.pdf",
"file_type": 2,
"file_status": 2,
"error_message": "",
"start_time": "2025-02-11T08:32:52.000000+0000",
"end_time": "2025-02-11T08:37:53.000000+0000"
}
],
"total": 1,
"completed": 1,
"failed": 0,
"processing": 0,
"pending": 0
}
}
Reprocessing failed files
POST /byoa/api/v1/index_workflow_job/{job_id}/files
Input parameters:
| Parameters | Required | Meaning |
|---|---|---|
| files | Yes | File id |
Example:
import requests
import json
url = "https://freetier-01.cn-hangzhou.cluster.cn-dev.matrixone.tech/byoa/api/v1/index_workflow_job/0194f423-c2a7-7cc5-87ce-97fa942ac6ce/files"
headers = {
"user-id":"xxxx",
"Access-Token": "xxxx",
"uid": "011d4b66-ace5-4d58-88a4-bc76719acda5-0194dfaa-3eda-7ea5-b47c-b4f4f594xxxx:admin:accountadmin"
}
body = {
"files": ["0194f423-c2a7-7ccc-a3da-732bafda96a3"]
}
response = requests.post(url, headers=headers)
if response.status_code == 200:
print(response.json())
else:
print(f"Request failed, status code: {response.status_code}, error message: {response.text}")
return:
{'code': 'ok', 'msg': 'ok', 'data': None}