documentation/content/developer/reference/extract_api/implementation.py
Artygo8 549e06ce05 [IMP] reference/extract: update documentation
task-3071317

closes odoo/documentation#4223

X-original-commit: 07eef81a95
Signed-off-by: Antoine Vandevenne (anv) <anv@odoo.com>
Signed-off-by: Arthur Gossuin (goa) <goa@odoo.com>
2023-04-21 16:14:02 +02:00

154 lines
5.0 KiB
Python

import base64
import json
import sys
import time
import requests
account_token = "integration_token" # Use your token
domain_name = "https://iap-extract.odoo.com"
path_to_pdf = "/path/to/your/pdf"
doc_type = "invoice" # invoice, expense or applicant
# Do not change
API_VERSION = {
'invoice': 122,
'expense': 132,
'applicant': 102,
}
def extract_jsonrpc_call(path: str, params: dict):
payload = {
'jsonrpc': '2.0',
'method': 'call',
'params': params,
'id': 0, # This should be unique for each call
}
response = requests.post(domain_name + path, json=payload, timeout=10)
response.raise_for_status()
json_response = response.json()
return json_response
def send_document_to_extract(doc_path: str):
with open(doc_path, 'rb') as f:
encoded_doc = base64.b64encode(f.read()).decode()
params = {
'account_token': account_token,
'version': API_VERSION[doc_type],
'documents': [encoded_doc],
}
response = extract_jsonrpc_call(f"/api/extract/{doc_type}/1/parse", params)
return response
def get_result_from_extract(document_uuid: str):
params = {
'version': API_VERSION[doc_type],
'document_uuid': document_uuid,
}
endpoint = f"/api/extract/{doc_type}/1/get_result"
response = extract_jsonrpc_call(endpoint, params)
while response['result']['status'] == 'processing':
print("Still processing... Retrying in 5 seconds")
time.sleep(5)
response = extract_jsonrpc_call(endpoint, params)
return response
def get_result_batch_from_extract(document_uuids: list):
"""Get the results of multiple documents at once."""
params = {
'version': API_VERSION[doc_type],
'document_uuids': document_uuids,
}
endpoint = f"/api/extract/{doc_type}/1/get_result_batch"
response = extract_jsonrpc_call(endpoint, params)
for uuid in document_uuids:
while response['result'][uuid]['status'] == 'processing':
print("Still processing... Retrying in 5 seconds")
time.sleep(5)
response = extract_jsonrpc_call(endpoint, params)
yield response
def validate_results(document_uuid: str):
# This is an example of how to validate the results of the parsing
# These values should be the correct values for the document reviewed by the user
params = {
'document_id': document_uuid,
'values': {
'total': {'content': float},
'subtotal': {'content': float},
'total_tax_amount': {'content': float},
'date': {'content': str}, # YYYY-MM-DD
'due_date': {'content': str}, # YYYY-MM-DD
'invoice_id': {'content': str},
'partner': {'content': str},
'VAT_Number': {'content': str},
'currency': {'content': str},
'merged_lines': bool,
'invoice_lines': {
'lines': [
{
'description': str,
'quantity': float,
'unit_price': float,
'product': str,
'taxes_amount': float,
'taxes': [
{
"amount": float,
"type": "fixed"|"percent",
"price_include": bool
},
...
],
'subtotal': float,
'total': float,
},
...
],
}
}
}
response = extract_jsonrpc_call(f"/api/extract/{doc_type}/1/validate", params)
return response
if __name__ == '__main__':
# Parse the document
response = send_document_to_extract(path_to_pdf)
print("/parse call status: ", response['result']['status_msg'])
if response['result']['status'] != 'success':
sys.exit(1)
document_uuid = response['result']['document_uuid']
# Get the results of the parsing
response = get_result_from_extract(document_uuid)
# Write the response to a file
output_file = 'response.json'
with open(output_file, 'w') as f:
json.dump(response, f, indent=2)
print("\nResult saved in", output_file)
print("/get_results call status: ", response['result']['status_msg'])
if response['result']['status'] != 'success':
sys.exit(1)
document_results = response['result']['results'][0]
print("\nTotal:", document_results['total']['selected_value']['content'])
print("Subtotal:", document_results['subtotal']['selected_value']['content'])
print("Invoice id:", document_results['invoice_id']['selected_value']['content'])
print("Date:", document_results['date']['selected_value']['content'])
print("...\n")
# Validate the results
response = validate_results(document_uuid)
print("/validate call status: %s" % response['result']['status_msg'])