[IMP] reference/extract: update documentation
task-3071317 closes odoo/documentation#3835 Signed-off-by: Antoine Vandevenne (anv) <anv@odoo.com>
This commit is contained in:
parent
b40fd5b284
commit
07eef81a95
File diff suppressed because it is too large
Load Diff
@ -7,95 +7,147 @@ import requests
|
|||||||
|
|
||||||
account_token = "integration_token" # Use your token
|
account_token = "integration_token" # Use your token
|
||||||
domain_name = "https://iap-extract.odoo.com"
|
domain_name = "https://iap-extract.odoo.com"
|
||||||
path_to_pdf = "/path/to/invoice_file"
|
path_to_pdf = "/path/to/your/pdf"
|
||||||
|
doc_type = "invoice" # invoice, expense or applicant
|
||||||
|
|
||||||
API_VERSION = 120 # Do not change
|
# Do not change
|
||||||
SUCCESS = 0
|
API_VERSION = {
|
||||||
NOT_READY = 1
|
'invoice': 122,
|
||||||
|
'expense': 132,
|
||||||
|
'applicant': 102,
|
||||||
|
}
|
||||||
|
|
||||||
def jsonrpc(path, params):
|
def extract_jsonrpc_call(path: str, params: dict):
|
||||||
payload = {
|
payload = {
|
||||||
'jsonrpc': '2.0',
|
'jsonrpc': '2.0',
|
||||||
'method': 'call',
|
'method': 'call',
|
||||||
'params': params,
|
'params': params,
|
||||||
'id': 0,
|
'id': 0, # This should be unique for each call
|
||||||
}
|
}
|
||||||
req = requests.post(domain_name+path, json=payload, timeout=10)
|
response = requests.post(domain_name + path, json=payload, timeout=10)
|
||||||
req.raise_for_status()
|
response.raise_for_status()
|
||||||
resp = req.json()
|
json_response = response.json()
|
||||||
return resp
|
return json_response
|
||||||
|
|
||||||
|
|
||||||
with open(path_to_pdf, "rb") as file:
|
def send_document_to_extract(doc_path: str):
|
||||||
|
with open(doc_path, 'rb') as f:
|
||||||
|
encoded_doc = base64.b64encode(f.read()).decode()
|
||||||
params = {
|
params = {
|
||||||
'account_token': account_token,
|
'account_token': account_token,
|
||||||
'version': API_VERSION,
|
'version': API_VERSION[doc_type],
|
||||||
'documents': [base64.b64encode(file.read()).decode('ascii')],
|
'documents': [encoded_doc],
|
||||||
}
|
}
|
||||||
|
response = extract_jsonrpc_call(f"/api/extract/{doc_type}/1/parse", params)
|
||||||
|
return response
|
||||||
|
|
||||||
response = jsonrpc("/iap/invoice_extract/parse", params)
|
|
||||||
print("/parse call status: ", response['result']['status_msg'])
|
|
||||||
|
|
||||||
if response['result']['status_code'] != SUCCESS:
|
def get_result_from_extract(document_uuid: str):
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
# You received an id that you can use to poll the server to get the result of the ocr when it will be ready
|
|
||||||
document_id = response['result']['document_id']
|
|
||||||
params = {
|
params = {
|
||||||
'version': API_VERSION,
|
'version': API_VERSION[doc_type],
|
||||||
'document_ids': [document_id], # you can request the results of multiple documents at once if wanted
|
'document_uuid': document_uuid,
|
||||||
}
|
}
|
||||||
|
endpoint = f"/api/extract/{doc_type}/1/get_result"
|
||||||
response = jsonrpc("/iap/invoice_extract/get_results", params)
|
response = extract_jsonrpc_call(endpoint, params)
|
||||||
document_id = str(document_id) # /get_results expects a string despite the fact that the returned document_id is a int
|
while response['result']['status'] == 'processing':
|
||||||
|
|
||||||
while response['result'][document_id]['status_code'] == NOT_READY: # 1 is the status code indicating that the server is still processing the document
|
|
||||||
print("Still processing... Retrying in 5 seconds")
|
print("Still processing... Retrying in 5 seconds")
|
||||||
time.sleep(5)
|
time.sleep(5)
|
||||||
response = jsonrpc("/iap/invoice_extract/get_results", params)
|
response = extract_jsonrpc_call(endpoint, params)
|
||||||
|
return response
|
||||||
|
|
||||||
with open('results.txt', 'w') as outfile:
|
|
||||||
json.dump(response, outfile, indent=2)
|
|
||||||
print("\nResult saved in results.txt")
|
|
||||||
|
|
||||||
if response['result'][document_id]['status_code'] != SUCCESS:
|
def get_result_batch_from_extract(document_uuids: list):
|
||||||
print(response['result'][document_id]['status_msg']) # if it isn't a success, print the error message
|
"""Get the results of multiple documents at once."""
|
||||||
|
params = {
|
||||||
|
'version': API_VERSION[doc_type],
|
||||||
|
'document_uuids': document_uuids,
|
||||||
|
}
|
||||||
|
endpoint = f"/api/extract/{doc_type}/1/get_result_batch"
|
||||||
|
response = extract_jsonrpc_call(endpoint, params)
|
||||||
|
for uuid in document_uuids:
|
||||||
|
while response['result'][uuid]['status'] == 'processing':
|
||||||
|
print("Still processing... Retrying in 5 seconds")
|
||||||
|
time.sleep(5)
|
||||||
|
response = extract_jsonrpc_call(endpoint, params)
|
||||||
|
yield response
|
||||||
|
|
||||||
|
|
||||||
|
def validate_results(document_uuid: str):
|
||||||
|
# This is an example of how to validate the results of the parsing
|
||||||
|
# These values should be the correct values for the document reviewed by the user
|
||||||
|
params = {
|
||||||
|
'document_id': document_uuid,
|
||||||
|
'values': {
|
||||||
|
'total': {'content': float},
|
||||||
|
'subtotal': {'content': float},
|
||||||
|
'total_tax_amount': {'content': float},
|
||||||
|
'date': {'content': str}, # YYYY-MM-DD
|
||||||
|
'due_date': {'content': str}, # YYYY-MM-DD
|
||||||
|
'invoice_id': {'content': str},
|
||||||
|
'partner': {'content': str},
|
||||||
|
'VAT_Number': {'content': str},
|
||||||
|
'currency': {'content': str},
|
||||||
|
'merged_lines': bool,
|
||||||
|
'invoice_lines': {
|
||||||
|
'lines': [
|
||||||
|
{
|
||||||
|
'description': str,
|
||||||
|
'quantity': float,
|
||||||
|
'unit_price': float,
|
||||||
|
'product': str,
|
||||||
|
'taxes_amount': float,
|
||||||
|
'taxes': [
|
||||||
|
{
|
||||||
|
"amount": float,
|
||||||
|
"type": "fixed"|"percent",
|
||||||
|
"price_include": bool
|
||||||
|
},
|
||||||
|
...
|
||||||
|
],
|
||||||
|
'subtotal': float,
|
||||||
|
'total': float,
|
||||||
|
},
|
||||||
|
...
|
||||||
|
],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
response = extract_jsonrpc_call(f"/api/extract/{doc_type}/1/validate", params)
|
||||||
|
return response
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
# Parse the document
|
||||||
|
response = send_document_to_extract(path_to_pdf)
|
||||||
|
print("/parse call status: ", response['result']['status_msg'])
|
||||||
|
|
||||||
|
if response['result']['status'] != 'success':
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
document_results = response['result'][document_id]['results'][0]
|
document_uuid = response['result']['document_uuid']
|
||||||
|
|
||||||
|
# Get the results of the parsing
|
||||||
|
response = get_result_from_extract(document_uuid)
|
||||||
|
|
||||||
|
# Write the response to a file
|
||||||
|
output_file = 'response.json'
|
||||||
|
with open(output_file, 'w') as f:
|
||||||
|
json.dump(response, f, indent=2)
|
||||||
|
print("\nResult saved in", output_file)
|
||||||
|
|
||||||
|
print("/get_results call status: ", response['result']['status_msg'])
|
||||||
|
if response['result']['status'] != 'success':
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
document_results = response['result']['results'][0]
|
||||||
|
|
||||||
print("\nTotal:", document_results['total']['selected_value']['content'])
|
print("\nTotal:", document_results['total']['selected_value']['content'])
|
||||||
print("Subtotal:", document_results['subtotal']['selected_value']['content'])
|
print("Subtotal:", document_results['subtotal']['selected_value']['content'])
|
||||||
print("Invoice id:", document_results['invoice_id']['selected_value']['content'])
|
print("Invoice id:", document_results['invoice_id']['selected_value']['content'])
|
||||||
print("Date:", document_results['date']['selected_value']['content'])
|
print("Date:", document_results['date']['selected_value']['content'])
|
||||||
print("...\n")
|
print("...\n")
|
||||||
|
|
||||||
params = {
|
# Validate the results
|
||||||
'document_id': document_id,
|
response = validate_results(document_uuid)
|
||||||
'values': {
|
print("/validate call status: %s" % response['result']['status_msg'])
|
||||||
'total': {'content': 100.0},
|
|
||||||
'subtotal': {'content': 100.0},
|
|
||||||
'global_taxes': {'content': []},
|
|
||||||
'global_taxes_amount': {'content': 0.0},
|
|
||||||
'date': {'content': '2020-09-25'},
|
|
||||||
'due_date': {'content': '2020-09-25'},
|
|
||||||
'invoice_id': {'content': document_results['invoice_id']['selected_value']['content']},
|
|
||||||
'partner': {'content': 'twinnta'},
|
|
||||||
'VAT_Number': {'content': 'BE23252248420'},
|
|
||||||
'currency': {'content': 'USD'},
|
|
||||||
'merged_lines': False,
|
|
||||||
'invoice_lines': {'lines': [{'description': 'Total TVA ',
|
|
||||||
'quantity': 1.0,
|
|
||||||
'unit_price': 100.0,
|
|
||||||
'product': False,
|
|
||||||
'taxes_amount': 0.0,
|
|
||||||
'taxes': [],
|
|
||||||
'subtotal': 100.0,
|
|
||||||
'total': 100.0}]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
response = jsonrpc("/iap/invoice_extract/validate", params)
|
|
||||||
if response['result']['status_code'] == SUCCESS:
|
|
||||||
print("/validate call status: Success")
|
|
||||||
else:
|
|
||||||
print("/validate call status: wrong format")
|
|
||||||
|
Loading…
Reference in New Issue
Block a user