本文共 9129 字,大约阅读时间需要 30 分钟。
class DocxImageInserter: def __init__(self, docx_path): self.docx_path = docx_path self.temp_dir = 'temp_docx' self.namespaces = { 'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main', 'wp': 'http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing', 'a': 'http://schemas.openxmlformats.org/drawingml/2006/main', 'pic': 'http://schemas.openxmlformats.org/drawingml/2006/picture', 'r': 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' } self.media_dir = os.path.join(self.temp_dir, 'word/media') self.rels_path = os.path.join(self.temp_dir, 'word/_rels/document.xml.rels') self.document_path = os.path.join(self.temp_dir, 'word/document.xml') self.next_rid = 1 def insert_images_at_bookmarks(self, image_dict, output_path=None): try: self._prepare_working_directory() tree, relationships = self._parse_document_files() self._process_bookmarks(tree, relationships, image_dict) self._save_modified_files(tree, relationships) self._repack_docx(output_path) print(f"图片已成功插入到书签位置,保存到: {output_path or self.docx_path}") finally: self._cleanup() def _prepare_working_directory(self): if os.path.exists(self.temp_dir): shutil.rmtree(self.temp_dir) os.makedirs(self.temp_dir) with ZipFile(self.docx_path) as z: z.extractall(self.temp_dir) os.makedirs(self.media_dir, exist_ok=True) def _parse_document_files(self): xml_content = open(self.document_path, 'rb').read() parser = etree.XMLParser(remove_blank_text=True) tree = etree.fromstring(xml_content, parser) if os.path.exists(self.rels_path): parser = etree.XMLParser(remove_blank_text=True) tree_rels = etree.parse(self.rels_path, parser) relationships = tree_rels.getroot() else: relationships = etree.Element('Relationships', xmlns='http://schemas.openxmlformats.org/package/2006/relationships') existing_rids = [int(rid[3:]) for rid in relationships.xpath('//@Id', namespaces=self.namespaces['r']) if rid.startswith('rId') and rid[3:].isdigit()] self.next_rid = max(existing_rids) + 1 if existing_rids else 1 return tree, relationships def _process_bookmarks(self, tree, relationships, image_dict): bookmark_starts = tree.xpath('//w:bookmarkStart', namespaces=self.namespaces) for bookmark_start in bookmark_starts: bookmark_name = bookmark_start.get(f'{self.namespaces["w"]}}}name') if bookmark_name in image_dict: image_path = image_dict[bookmark_name] if os.path.isfile(image_path): self._delete_text_before_bookmark(tree, bookmark_start) self._insert_image_at_bookmark(tree, relationships, bookmark_start, image_path) def _delete_text_before_bookmark(self, tree, bookmark_start): parent = bookmark_start.getparent() if parent is not None: for element in parent.itersiblings(preceding=True): if element.tag.endswith('}bookmarkEnd'): break if element.tag.endswith('}r') and element.xpath('.//w:t', namespaces=self.namespaces): parent.remove(element) for element in bookmark_start.itersiblings(): if element.tag.endswith('}bookmarkEnd'): break if element.tag.endswith('}r') and element.xpath('.//w:t', namespaces=self.namespaces): parent.remove(element) def _insert_image_at_bookmark(self, tree, relationships, bookmark_start, image_path): image_name = f'image_{uuid.uuid4().hex}{os.path.splitext(image_path)[1]}' rel_id = f'rId{self.next_rid}' self.next_rid += 1 shutil.copy(image_path, os.path.join(self.media_dir, image_name)) drawing = self._create_image_xml(rel_id, image_name, image_path) parent = bookmark_start.getparent() if parent is not None: parent.insert(parent.index(bookmark_start) + 1, drawing) else: body = tree.xpath('//w:body', namespaces=self.namespaces)[0] body.append(drawing) self._add_image_relationship(relationships, rel_id, f'media/{image_name}') def _create_image_xml(self, rel_id, image_name, image_path): with Image.open(image_path) as img: width_px, height_px = img.size cell_width_px = 200 cell_height_px = 100 width_ratio = cell_width_px / width_px height_ratio = cell_height_px / height_px scale_ratio = min(width_ratio, height_ratio) emu_width = self._convert_pixels_to_emu(width_px) emu_height = self._convert_pixels_to_emu(height_px) drawing = etree.Element(f'{self.namespaces["w"]}}}drawing') inline = etree.SubElement(drawing, f'{self.namespaces["wp"]}}}inline', {'distT': "0", 'distB': "0", 'distL': "0", 'distR': "0"}) etree.SubElement(inline, f'{self.namespaces["wp"]}}}extent', {'cx': str(emu_width), 'cy': str(emu_height)}) etree.SubElement(inline, f'{self.namespaces["wp"]}}}effectExtent', {'l': "0", 't': "0", 'r': "0", 'b': "0"}) etree.SubElement(inline, f'{self.namespaces["wp"]}}}docPr', {'id': "1", 'name': image_name, 'descr': ""}) cNvGraphicFramePr = etree.SubElement(inline, f'{self.namespaces["wp"]}}}cNvGraphicFramePr') etree.SubElement(cNvGraphicFramePr, f'{self.namespaces["a"]}}}graphicFrameLocks', {'noChangeAspect': "1"}) graphic = etree.SubElement(inline, f'{self.namespaces["a"]}}}graphic') graphicData = etree.SubElement(graphic, f'{self.namespaces["a"]}}}graphicData', {'uri': "http://schemas.openxmlformats.org/drawingml/2006/picture"}) pic = etree.SubElement(graphicData, f'{self.namespaces["pic"]}}}pic') nvPicPr = etree.SubElement(pic, f'{self.namespaces["pic"]}}}nvPicPr') etree.SubElement(nvPicPr, f'{self.namespaces["pic"]}}}cNvPr', {'id': "0", 'name': image_name}) etree.SubElement(nvPicPr, f'{self.namespaces["pic"]}}}cNvPicPr') blipFill = etree.SubElement(pic, f'{self.namespaces["pic"]}}}blipFill') etree.SubElement(blipFill, f'{self.namespaces["a"]}}}blip', {'embed': rel_id}) stretch = etree.SubElement(blipFill, f'{self.namespaces["a"]}}}stretch') etree.SubElement(stretch, f'{self.namespaces["a"]}}}fillRect') spPr = etree.SubElement(pic, f'{self.namespaces["pic"]}}}spPr') xfrm = etree.SubElement(spPr, f'{self.namespaces["a"]}}}xfrm') etree.SubElement(xfrm, f'{self.namespaces["a"]}}}off', {'x': "0", 'y': "0"}) etree.SubElement(xfrm, f'{self.namespaces["a"]}}}ext', {'cx': str(emu_width), 'cy': str(emu_height)}) etree.SubElement(spPr, f'{self.namespaces["a"]}}}prstGeom', {'prst': "rect"}) etree.SubElement(spPr, f'{self.namespaces["a"]}}}avLst') return drawing def _convert_pixels_to_emu(self, width_px, height_px, dpi=96): emu_per_px = 914400 / dpi return int(width_px * emu_per_px), int(height_px * emu_perpx) def _add_image_relationship(self, relationships, rel_id, target): etree.SubElement(relationships, 'Relationship', { 'Id': rel_id, 'Type': 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/image', 'Target': target }) def _save_modified_files(self, tree, relationships): with open(self.document_path, 'wb') as f: f.write(etree.tostring(tree, pretty_print=True, encoding='UTF-8', xml_declaration=True)) if len(relationships) > 0: with open(self.rels_path, 'wb') as f: f.write(etree.tostring(relationships, pretty_print=True, encoding='UTF-8', xml_declaration=True)) def _repack_docx(self, output_path): output_path = output_path or self.docx_path with ZipFile(output_path, 'w', ZIP_DEFLATED) as z: for root, dirs, files in os.walk(self.temp_dir): for file in files: file_path = os.path.join(root, file) arcname = os.path.relpath(file_path, self.temp_dir) z.write(file_path, arcname) def _cleanup(self): if os.path.exists(self.temp_dir): shutil.rmtree(self.temp_dir) 以上代码定义了一个用于在Word文档的书签位置插入图片的类,主要功能包括:
示例使用方式如下:
docx_path = r"C:\Leon\python_project\oceanxecm\2025\04\20250422-AI-投标书\word\附件4_法定代表人身份证明书.docx"output_path = "output.docx"image_dict = { "ID_CARD_FRONT": r"C:\Leon\python_project\oceanxecm\2025\04\20250422-AI-投标书\png\正面.png", "ID_CARD_BACK": r"C:\Leon\python_project\oceanxecm\2025\04\20250422-AI-投标书\png\背面.png"}inserter = DocxImageInserter(docx_path)inserter.insert_images_at_bookmarks(image_dict, output_path) 该代码可以根据实际需求进行扩展和修改,适用于在Word文档中自动化处理图片插入任务。
转载地址:http://zlqfk.baihongyu.com/