# -*- coding: utf-8 -*- # Part of Odoo. See LICENSE file for full copyright and licensing details. import re from difflib import SequenceMatcher # ------------------------------------------------------------ # Patch and comparison functions # ------------------------------------------------------------ OPERATION_SEPARATOR = "\n" LINE_SEPARATOR = "<" PATCH_OPERATION_LINE_AT = "@" PATCH_OPERATION_CONTENT = ":" PATCH_OPERATION_ADD = "+" PATCH_OPERATION_REMOVE = "-" PATCH_OPERATION_REPLACE = "R" PATCH_OPERATIONS = dict( insert=PATCH_OPERATION_ADD, delete=PATCH_OPERATION_REMOVE, replace=PATCH_OPERATION_REPLACE, ) HTML_ATTRIBUTES_TO_REMOVE = [ "data-last-history-steps", ] def apply_patch(initial_content, patch): """Apply a patch (multiple operations) on a content. Each operation is a string with the following format: @[,][:*] patch format example: +@4:

ab

cd

+@4,15:

ef

gh

-@32 -@125,129 R@523:sdf :param string initial_content: the initial content to patch :param string patch: the patch to apply :return: string: the patched content """ if patch == "": return initial_content # Replace break line in initial content to ensure they don't interfere with # operations initial_content = initial_content.replace("\n", "") initial_content = _remove_html_attribute( initial_content, HTML_ATTRIBUTES_TO_REMOVE ) content = initial_content.split(LINE_SEPARATOR) patch_operations = patch.split(OPERATION_SEPARATOR) # Apply operations in reverse order to preserve the indexes integrity. patch_operations.reverse() for operation in patch_operations: metadata, *patch_content_line = operation.split(LINE_SEPARATOR) metadata_split = metadata.split(PATCH_OPERATION_LINE_AT) operation_type = metadata_split[0] lines_index_range = metadata_split[1] if len(metadata_split) > 1 else "" # We need to remove PATCH_OPERATION_CONTENT char from lines_index_range. lines_index_range = lines_index_range.split(PATCH_OPERATION_CONTENT)[0] indexes = lines_index_range.split(",") start_index = int(indexes[0]) end_index = int(indexes[1]) if len(indexes) > 1 else start_index # We need to insert lines from last to the first # to preserve the indexes integrity. patch_content_line.reverse() if end_index > start_index: for index in range(end_index, start_index, -1): if operation_type in [ PATCH_OPERATION_REMOVE, PATCH_OPERATION_REPLACE, ]: del content[index] if operation_type in [PATCH_OPERATION_ADD, PATCH_OPERATION_REPLACE]: for line in patch_content_line: content.insert(start_index + 1, line) if operation_type in [PATCH_OPERATION_REMOVE, PATCH_OPERATION_REPLACE]: del content[start_index] return LINE_SEPARATOR.join(content) HTML_TAG_ISOLATION_REGEX = r"^([^>]*>)(.*)$" ADDITION_COMPARISON_REGEX = r"\1\2" ADDITION_1ST_REPLACE_COMPARISON_REGEX = r"added>\2" DELETION_COMPARISON_REGEX = r"\1\2" EMPTY_OPERATION_TAG = r"<(added|removed)><\/(added|removed)>" SAME_TAG_REPLACE_FIXER = r"<\/added><(?:[^\/>]|(?:><))+>" UNNECESSARY_REPLACE_FIXER = ( r"([^<](?!<\/added>)*)<\/added>" r"([^<](?!<\/removed>)*)<\/removed>" ) def generate_comparison(new_content, old_content): """Compare a content to an older content and generate a comparison html between both content. :param string new_content: the current content :param string old_content: the old content :return: string: the comparison content """ new_content = _remove_html_attribute(new_content, HTML_ATTRIBUTES_TO_REMOVE) old_content = _remove_html_attribute(old_content, HTML_ATTRIBUTES_TO_REMOVE) if new_content == old_content: return new_content patch = generate_patch(new_content, old_content) comparison = new_content.split(LINE_SEPARATOR) patch_operations = patch.split(OPERATION_SEPARATOR) # We need to apply operation from last to the first # to preserve the indexes integrity. patch_operations.reverse() for operation in patch_operations: metadata, *patch_content_line = operation.split(LINE_SEPARATOR) metadata_split = metadata.split(PATCH_OPERATION_LINE_AT) operation_type = metadata_split[0] lines_index_range = metadata_split[1] if len(metadata_split) > 1 else "" lines_index_range = lines_index_range.split(PATCH_OPERATION_CONTENT)[0] indexes = lines_index_range.split(",") start_index = int(indexes[0]) end_index = int(indexes[1]) if len(indexes) > 1 else start_index # If the operation is a replace, we need to flag the changes that # will generate ghost opening tags if we don't ignore # them. # this can append when: # * A change concerning only html parameters. #

a

=>

a

# * An addition in a previously empty element opening tag #

=>

a

if operation_type == PATCH_OPERATION_REPLACE: for i, line in enumerate(patch_content_line): current_index = start_index + i if current_index > end_index: break current_line = comparison[current_index] current_line_tag = current_line.split(">")[0] line_tag = line.split(">")[0] if current_line[-1] == ">" and ( current_line_tag == line_tag or current_line_tag.split(" ")[0] == line_tag.split(" ")[0] ): comparison[start_index + i] = "delete_me>" # We need to insert lines from last to the first # to preserve the indexes integrity. patch_content_line.reverse() for index in range(end_index, start_index - 1, -1): if operation_type in [ PATCH_OPERATION_REMOVE, PATCH_OPERATION_REPLACE, ]: deletion_flagged_comparison = re.sub( HTML_TAG_ISOLATION_REGEX, DELETION_COMPARISON_REGEX, comparison[index], ) # Only use this line if it doesn't generate an empty # tag if not re.search( EMPTY_OPERATION_TAG, deletion_flagged_comparison ): comparison[index] = deletion_flagged_comparison if operation_type == PATCH_OPERATION_ADD: for line in patch_content_line: addition_flagged_line = re.sub( HTML_TAG_ISOLATION_REGEX, ADDITION_COMPARISON_REGEX, line ) if not re.search(EMPTY_OPERATION_TAG, addition_flagged_line): comparison.insert(start_index + 1, addition_flagged_line) else: comparison.insert(start_index + 1, line) if operation_type == PATCH_OPERATION_REPLACE: for i, line in enumerate(patch_content_line): addition_flagged_line = re.sub( HTML_TAG_ISOLATION_REGEX, ADDITION_COMPARISON_REGEX, line ) if not re.search(EMPTY_OPERATION_TAG, addition_flagged_line): comparison.insert(start_index, addition_flagged_line) elif ( line.split(">")[0] != comparison[start_index].split(">")[0] ): comparison.insert(start_index, line) final_comparison = LINE_SEPARATOR.join(comparison) # We can remove all the opening tags which are located between the end of an # added tag and the start of a removed tag, because this should never happen # as the added and removed tags should always be near each other. # This can happen when the new container tag had a parameter change. final_comparison = re.sub( SAME_TAG_REPLACE_FIXER, "
", final_comparison ) # Remove al the tags final_comparison = final_comparison.replace(r"", "") # This fix the issue of unnecessary replace tags. # ex: abcabc -> abc # This can occur when the new content is the same as the old content and # their container tags are the same but the tags parameters are different for match in re.finditer(UNNECESSARY_REPLACE_FIXER, final_comparison): if match.group(1) == match.group(2): final_comparison = final_comparison.replace( match.group(0), match.group(1) ) return final_comparison def _format_line_index(start, end): """Format the line index to be used in a patch operation. :param start: the start index :param end: the end index :return: string """ length = end - start if not length: start -= 1 if length <= 1: return "{}{}".format(PATCH_OPERATION_LINE_AT, start) return "{}{},{}".format(PATCH_OPERATION_LINE_AT, start, start + length - 1) def _patch_generator(new_content, old_content): """Generate a patch (multiple operations) between two contents. Each operation is a string with the following format: @[,][:*] patch format example: +@4:

ab

cd

+@4,15:

ef

gh

-@32 -@125,129 R@523:sdf :param string new_content: the new content :param string old_content: the old content :return: string: the patch containing all the operations to reverse the new content to the old content """ # remove break line in contents to ensure they don't interfere with # operations new_content = new_content.replace("\n", "") old_content = old_content.replace("\n", "") new_content_lines = new_content.split(LINE_SEPARATOR) old_content_lines = old_content.split(LINE_SEPARATOR) for group in SequenceMatcher( None, new_content_lines, old_content_lines, False ).get_grouped_opcodes(0): patch_content_line = [] first, last = group[0], group[-1] patch_operation = _format_line_index(first[1], last[2]) if any(tag in {"replace", "delete"} for tag, _, _, _, _ in group): for tag, _, _, _, _ in group: if tag not in {"insert", "equal", "replace"}: patch_operation = PATCH_OPERATIONS[tag] + patch_operation if any(tag in {"replace", "insert"} for tag, _, _, _, _ in group): for tag, _, _, j1, j2 in group: if tag not in {"delete", "equal"}: patch_operation = PATCH_OPERATIONS[tag] + patch_operation for line in old_content_lines[j1:j2]: patch_content_line.append(line) if patch_content_line: patch_content = LINE_SEPARATOR + LINE_SEPARATOR.join( patch_content_line ) yield str(patch_operation) + PATCH_OPERATION_CONTENT + patch_content else: yield str(patch_operation) def generate_patch(new_content, old_content): new_content = _remove_html_attribute(new_content, HTML_ATTRIBUTES_TO_REMOVE) old_content = _remove_html_attribute(old_content, HTML_ATTRIBUTES_TO_REMOVE) return OPERATION_SEPARATOR.join( list(_patch_generator(new_content, old_content)) ) def _remove_html_attribute(html_content, attributes_to_remove): for attribute in attributes_to_remove: html_content = re.sub( r' {}="[^"]*"'.format(attribute), "", html_content ) return html_content