[IMP] reference/extract: update documentation

task-3071317

closes odoo/documentation#4224

X-original-commit: 07eef81a95
Signed-off-by: Antoine Vandevenne (anv) <anv@odoo.com>
Signed-off-by: Arthur Gossuin (goa) <goa@odoo.com>
This commit is contained in:
Artygo8 2023-03-16 14:27:36 +00:00 committed by Antoine Vandevenne (anv)
parent 56cf4288c7
commit ef4c3555bc
2 changed files with 857 additions and 530 deletions

File diff suppressed because it is too large Load Diff

View File

@ -7,95 +7,147 @@ import requests
account_token = "integration_token" # Use your token account_token = "integration_token" # Use your token
domain_name = "https://iap-extract.odoo.com" domain_name = "https://iap-extract.odoo.com"
path_to_pdf = "/path/to/invoice_file" path_to_pdf = "/path/to/your/pdf"
doc_type = "invoice" # invoice, expense or applicant
API_VERSION = 120 # Do not change # Do not change
SUCCESS = 0 API_VERSION = {
NOT_READY = 1 'invoice': 122,
'expense': 132,
'applicant': 102,
}
def jsonrpc(path, params): def extract_jsonrpc_call(path: str, params: dict):
payload = { payload = {
'jsonrpc': '2.0', 'jsonrpc': '2.0',
'method': 'call', 'method': 'call',
'params': params, 'params': params,
'id': 0, 'id': 0, # This should be unique for each call
} }
req = requests.post(domain_name+path, json=payload, timeout=10) response = requests.post(domain_name + path, json=payload, timeout=10)
req.raise_for_status() response.raise_for_status()
resp = req.json() json_response = response.json()
return resp return json_response
with open(path_to_pdf, "rb") as file: def send_document_to_extract(doc_path: str):
with open(doc_path, 'rb') as f:
encoded_doc = base64.b64encode(f.read()).decode()
params = { params = {
'account_token': account_token, 'account_token': account_token,
'version': API_VERSION, 'version': API_VERSION[doc_type],
'documents': [base64.b64encode(file.read()).decode('ascii')], 'documents': [encoded_doc],
} }
response = extract_jsonrpc_call(f"/api/extract/{doc_type}/1/parse", params)
return response
response = jsonrpc("/iap/invoice_extract/parse", params)
print("/parse call status: ", response['result']['status_msg'])
if response['result']['status_code'] != SUCCESS: def get_result_from_extract(document_uuid: str):
sys.exit(1) params = {
'version': API_VERSION[doc_type],
'document_uuid': document_uuid,
}
endpoint = f"/api/extract/{doc_type}/1/get_result"
response = extract_jsonrpc_call(endpoint, params)
while response['result']['status'] == 'processing':
print("Still processing... Retrying in 5 seconds")
time.sleep(5)
response = extract_jsonrpc_call(endpoint, params)
return response
# You received an id that you can use to poll the server to get the result of the ocr when it will be ready
document_id = response['result']['document_id']
params = {
'version': API_VERSION,
'document_ids': [document_id], # you can request the results of multiple documents at once if wanted
}
response = jsonrpc("/iap/invoice_extract/get_results", params) def get_result_batch_from_extract(document_uuids: list):
document_id = str(document_id) # /get_results expects a string despite the fact that the returned document_id is a int """Get the results of multiple documents at once."""
params = {
'version': API_VERSION[doc_type],
'document_uuids': document_uuids,
}
endpoint = f"/api/extract/{doc_type}/1/get_result_batch"
response = extract_jsonrpc_call(endpoint, params)
for uuid in document_uuids:
while response['result'][uuid]['status'] == 'processing':
print("Still processing... Retrying in 5 seconds")
time.sleep(5)
response = extract_jsonrpc_call(endpoint, params)
yield response
while response['result'][document_id]['status_code'] == NOT_READY: # 1 is the status code indicating that the server is still processing the document
print("Still processing... Retrying in 5 seconds")
time.sleep(5)
response = jsonrpc("/iap/invoice_extract/get_results", params)
with open('results.txt', 'w') as outfile: def validate_results(document_uuid: str):
json.dump(response, outfile, indent=2) # This is an example of how to validate the results of the parsing
print("\nResult saved in results.txt") # These values should be the correct values for the document reviewed by the user
params = {
if response['result'][document_id]['status_code'] != SUCCESS: 'document_id': document_uuid,
print(response['result'][document_id]['status_msg']) # if it isn't a success, print the error message 'values': {
sys.exit(1) 'total': {'content': float},
'subtotal': {'content': float},
document_results = response['result'][document_id]['results'][0] 'total_tax_amount': {'content': float},
print("\nTotal:", document_results['total']['selected_value']['content']) 'date': {'content': str}, # YYYY-MM-DD
print("Subtotal:", document_results['subtotal']['selected_value']['content']) 'due_date': {'content': str}, # YYYY-MM-DD
print("Invoice id:", document_results['invoice_id']['selected_value']['content']) 'invoice_id': {'content': str},
print("Date:", document_results['date']['selected_value']['content']) 'partner': {'content': str},
print("...\n") 'VAT_Number': {'content': str},
'currency': {'content': str},
params = { 'merged_lines': bool,
'document_id': document_id, 'invoice_lines': {
'values': { 'lines': [
'total': {'content': 100.0}, {
'subtotal': {'content': 100.0}, 'description': str,
'global_taxes': {'content': []}, 'quantity': float,
'global_taxes_amount': {'content': 0.0}, 'unit_price': float,
'date': {'content': '2020-09-25'}, 'product': str,
'due_date': {'content': '2020-09-25'}, 'taxes_amount': float,
'invoice_id': {'content': document_results['invoice_id']['selected_value']['content']}, 'taxes': [
'partner': {'content': 'twinnta'}, {
'VAT_Number': {'content': 'BE23252248420'}, "amount": float,
'currency': {'content': 'USD'}, "type": "fixed"|"percent",
'merged_lines': False, "price_include": bool
'invoice_lines': {'lines': [{'description': 'Total TVA ', },
'quantity': 1.0, ...
'unit_price': 100.0, ],
'product': False, 'subtotal': float,
'taxes_amount': 0.0, 'total': float,
'taxes': [], },
'subtotal': 100.0, ...
'total': 100.0}] ],
}
} }
} }
} response = extract_jsonrpc_call(f"/api/extract/{doc_type}/1/validate", params)
response = jsonrpc("/iap/invoice_extract/validate", params) return response
if response['result']['status_code'] == SUCCESS:
print("/validate call status: Success")
else: if __name__ == '__main__':
print("/validate call status: wrong format")
# Parse the document
response = send_document_to_extract(path_to_pdf)
print("/parse call status: ", response['result']['status_msg'])
if response['result']['status'] != 'success':
sys.exit(1)
document_uuid = response['result']['document_uuid']
# Get the results of the parsing
response = get_result_from_extract(document_uuid)
# Write the response to a file
output_file = 'response.json'
with open(output_file, 'w') as f:
json.dump(response, f, indent=2)
print("\nResult saved in", output_file)
print("/get_results call status: ", response['result']['status_msg'])
if response['result']['status'] != 'success':
sys.exit(1)
document_results = response['result']['results'][0]
print("\nTotal:", document_results['total']['selected_value']['content'])
print("Subtotal:", document_results['subtotal']['selected_value']['content'])
print("Invoice id:", document_results['invoice_id']['selected_value']['content'])
print("Date:", document_results['date']['selected_value']['content'])
print("...\n")
# Validate the results
response = validate_results(document_uuid)
print("/validate call status: %s" % response['result']['status_msg'])