09-smart_bookmark_filler.py-白红宇

09-smart_bookmark_filler.py

阅读量：797 次

发布时间：2023-03-23

本文共 9129 字，大约阅读时间需要 30 分钟。

class DocxImageInserter:
    def __init__(self, docx_path):
        self.docx_path = docx_path
        self.temp_dir = 'temp_docx'
        self.namespaces = {
            'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main',
            'wp': 'http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing',
            'a': 'http://schemas.openxmlformats.org/drawingml/2006/main',
            'pic': 'http://schemas.openxmlformats.org/drawingml/2006/picture',
            'r': 'http://schemas.openxmlformats.org/officeDocument/2006/relationships'
        }
        self.media_dir = os.path.join(self.temp_dir, 'word/media')
        self.rels_path = os.path.join(self.temp_dir, 'word/_rels/document.xml.rels')
        self.document_path = os.path.join(self.temp_dir, 'word/document.xml')
        self.next_rid = 1
    def insert_images_at_bookmarks(self, image_dict, output_path=None):
        try:
            self._prepare_working_directory()
            tree, relationships = self._parse_document_files()
            self._process_bookmarks(tree, relationships, image_dict)
            self._save_modified_files(tree, relationships)
            self._repack_docx(output_path)
            print(f"图片已成功插入到书签位置，保存到: {output_path or self.docx_path}")
        finally:
            self._cleanup()
    def _prepare_working_directory(self):
        if os.path.exists(self.temp_dir):
            shutil.rmtree(self.temp_dir)
        os.makedirs(self.temp_dir)
        with ZipFile(self.docx_path) as z:
            z.extractall(self.temp_dir)
        os.makedirs(self.media_dir, exist_ok=True)
    def _parse_document_files(self):
        xml_content = open(self.document_path, 'rb').read()
        parser = etree.XMLParser(remove_blank_text=True)
        tree = etree.fromstring(xml_content, parser)
        if os.path.exists(self.rels_path):
            parser = etree.XMLParser(remove_blank_text=True)
            tree_rels = etree.parse(self.rels_path, parser)
            relationships = tree_rels.getroot()
        else:
            relationships = etree.Element('Relationships', xmlns='http://schemas.openxmlformats.org/package/2006/relationships')
        existing_rids = [int(rid[3:]) for rid in relationships.xpath('//@Id', namespaces=self.namespaces['r']) if rid.startswith('rId') and rid[3:].isdigit()]
        self.next_rid = max(existing_rids) + 1 if existing_rids else 1
        return tree, relationships
    def _process_bookmarks(self, tree, relationships, image_dict):
        bookmark_starts = tree.xpath('//w:bookmarkStart', namespaces=self.namespaces)
        for bookmark_start in bookmark_starts:
            bookmark_name = bookmark_start.get(f'{self.namespaces["w"]}}}name')
            if bookmark_name in image_dict:
                image_path = image_dict[bookmark_name]
                if os.path.isfile(image_path):
                    self._delete_text_before_bookmark(tree, bookmark_start)
                    self._insert_image_at_bookmark(tree, relationships, bookmark_start, image_path)
    def _delete_text_before_bookmark(self, tree, bookmark_start):
        parent = bookmark_start.getparent()
        if parent is not None:
            for element in parent.itersiblings(preceding=True):
                if element.tag.endswith('}bookmarkEnd'):
                    break
                if element.tag.endswith('}r') and element.xpath('.//w:t', namespaces=self.namespaces):
                    parent.remove(element)
            for element in bookmark_start.itersiblings():
                if element.tag.endswith('}bookmarkEnd'):
                    break
                if element.tag.endswith('}r') and element.xpath('.//w:t', namespaces=self.namespaces):
                    parent.remove(element)
    def _insert_image_at_bookmark(self, tree, relationships, bookmark_start, image_path):
        image_name = f'image_{uuid.uuid4().hex}{os.path.splitext(image_path)[1]}'
        rel_id = f'rId{self.next_rid}'
        self.next_rid += 1
        shutil.copy(image_path, os.path.join(self.media_dir, image_name))
        drawing = self._create_image_xml(rel_id, image_name, image_path)
        parent = bookmark_start.getparent()
        if parent is not None:
            parent.insert(parent.index(bookmark_start) + 1, drawing)
        else:
            body = tree.xpath('//w:body', namespaces=self.namespaces)[0]
            body.append(drawing)
        self._add_image_relationship(relationships, rel_id, f'media/{image_name}')
    def _create_image_xml(self, rel_id, image_name, image_path):
        with Image.open(image_path) as img:
            width_px, height_px = img.size
            cell_width_px = 200
            cell_height_px = 100
            width_ratio = cell_width_px / width_px
            height_ratio = cell_height_px / height_px
            scale_ratio = min(width_ratio, height_ratio)
            emu_width = self._convert_pixels_to_emu(width_px)
            emu_height = self._convert_pixels_to_emu(height_px)
        drawing = etree.Element(f'{self.namespaces["w"]}}}drawing')
        inline = etree.SubElement(drawing, f'{self.namespaces["wp"]}}}inline', {'distT': "0", 'distB': "0", 'distL': "0", 'distR': "0"})
        etree.SubElement(inline, f'{self.namespaces["wp"]}}}extent', {'cx': str(emu_width), 'cy': str(emu_height)})
        etree.SubElement(inline, f'{self.namespaces["wp"]}}}effectExtent', {'l': "0", 't': "0", 'r': "0", 'b': "0"})
        etree.SubElement(inline, f'{self.namespaces["wp"]}}}docPr', {'id': "1", 'name': image_name, 'descr': ""})
        cNvGraphicFramePr = etree.SubElement(inline, f'{self.namespaces["wp"]}}}cNvGraphicFramePr')
        etree.SubElement(cNvGraphicFramePr, f'{self.namespaces["a"]}}}graphicFrameLocks', {'noChangeAspect': "1"})
        graphic = etree.SubElement(inline, f'{self.namespaces["a"]}}}graphic')
        graphicData = etree.SubElement(graphic, f'{self.namespaces["a"]}}}graphicData', {'uri': "http://schemas.openxmlformats.org/drawingml/2006/picture"})
        pic = etree.SubElement(graphicData, f'{self.namespaces["pic"]}}}pic')
        nvPicPr = etree.SubElement(pic, f'{self.namespaces["pic"]}}}nvPicPr')
        etree.SubElement(nvPicPr, f'{self.namespaces["pic"]}}}cNvPr', {'id': "0", 'name': image_name})
        etree.SubElement(nvPicPr, f'{self.namespaces["pic"]}}}cNvPicPr')
        blipFill = etree.SubElement(pic, f'{self.namespaces["pic"]}}}blipFill')
        etree.SubElement(blipFill, f'{self.namespaces["a"]}}}blip', {'embed': rel_id})
        stretch = etree.SubElement(blipFill, f'{self.namespaces["a"]}}}stretch')
        etree.SubElement(stretch, f'{self.namespaces["a"]}}}fillRect')
        spPr = etree.SubElement(pic, f'{self.namespaces["pic"]}}}spPr')
        xfrm = etree.SubElement(spPr, f'{self.namespaces["a"]}}}xfrm')
        etree.SubElement(xfrm, f'{self.namespaces["a"]}}}off', {'x': "0", 'y': "0"})
        etree.SubElement(xfrm, f'{self.namespaces["a"]}}}ext', {'cx': str(emu_width), 'cy': str(emu_height)})
        etree.SubElement(spPr, f'{self.namespaces["a"]}}}prstGeom', {'prst': "rect"})
        etree.SubElement(spPr, f'{self.namespaces["a"]}}}avLst')
        return drawing
    def _convert_pixels_to_emu(self, width_px, height_px, dpi=96):
        emu_per_px = 914400 / dpi
        return int(width_px * emu_per_px), int(height_px * emu_perpx)
    def _add_image_relationship(self, relationships, rel_id, target):
        etree.SubElement(relationships, 'Relationship', {
            'Id': rel_id,
            'Type': 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/image',
            'Target': target
        })
    def _save_modified_files(self, tree, relationships):
        with open(self.document_path, 'wb') as f:
            f.write(etree.tostring(tree, pretty_print=True, encoding='UTF-8', xml_declaration=True))
        if len(relationships) > 0:
            with open(self.rels_path, 'wb') as f:
                f.write(etree.tostring(relationships, pretty_print=True, encoding='UTF-8', xml_declaration=True))
    def _repack_docx(self, output_path):
        output_path = output_path or self.docx_path
        with ZipFile(output_path, 'w', ZIP_DEFLATED) as z:
            for root, dirs, files in os.walk(self.temp_dir):
                for file in files:
                    file_path = os.path.join(root, file)
                    arcname = os.path.relpath(file_path, self.temp_dir)
                    z.write(file_path, arcname)
    def _cleanup(self):
        if os.path.exists(self.temp_dir):
            shutil.rmtree(self.temp_dir)

以上代码定义了一个用于在Word文档的书签位置插入图片的类，主要功能包括：

解压并解析Word文档的主文档和关系文件

处理所有书签位置，插入指定图片

清理临时文件

示例使用方式如下：

docx_path = r"C:\Leon\python_project\oceanxecm\2025\04\20250422-AI-投标书\word\附件4_法定代表人身份证明书.docx"
output_path = "output.docx"
image_dict = {
    "ID_CARD_FRONT": r"C:\Leon\python_project\oceanxecm\2025\04\20250422-AI-投标书\png\正面.png",
    "ID_CARD_BACK": r"C:\Leon\python_project\oceanxecm\2025\04\20250422-AI-投标书\png\背面.png"
}
inserter = DocxImageInserter(docx_path)
inserter.insert_images_at_bookmarks(image_dict, output_path)

该代码可以根据实际需求进行扩展和修改，适用于在Word文档中自动化处理图片插入任务。

转载地址：http://zlqfk.baihongyu.com/

你可能感兴趣的文章

Objective-C实现一分钟倒计时(附完整源码)

查看>>

Objective-C实现三次样条曲线（附完整源码）

查看>>

Objective-C实现上传文件到FTP服务器(附完整源码)

查看>>

Objective-C实现两数之和问题(附完整源码)

查看>>

Objective-C实现串口通讯(附完整源码)