博客
关于我
09-smart_bookmark_filler.py
阅读量:797 次
发布时间:2023-03-23

本文共 9129 字,大约阅读时间需要 30 分钟。

class DocxImageInserter:
def __init__(self, docx_path):
self.docx_path = docx_path
self.temp_dir = 'temp_docx'
self.namespaces = {
'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main',
'wp': 'http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing',
'a': 'http://schemas.openxmlformats.org/drawingml/2006/main',
'pic': 'http://schemas.openxmlformats.org/drawingml/2006/picture',
'r': 'http://schemas.openxmlformats.org/officeDocument/2006/relationships'
}
self.media_dir = os.path.join(self.temp_dir, 'word/media')
self.rels_path = os.path.join(self.temp_dir, 'word/_rels/document.xml.rels')
self.document_path = os.path.join(self.temp_dir, 'word/document.xml')
self.next_rid = 1
def insert_images_at_bookmarks(self, image_dict, output_path=None):
try:
self._prepare_working_directory()
tree, relationships = self._parse_document_files()
self._process_bookmarks(tree, relationships, image_dict)
self._save_modified_files(tree, relationships)
self._repack_docx(output_path)
print(f"图片已成功插入到书签位置,保存到: {output_path or self.docx_path}")
finally:
self._cleanup()
def _prepare_working_directory(self):
if os.path.exists(self.temp_dir):
shutil.rmtree(self.temp_dir)
os.makedirs(self.temp_dir)
with ZipFile(self.docx_path) as z:
z.extractall(self.temp_dir)
os.makedirs(self.media_dir, exist_ok=True)
def _parse_document_files(self):
xml_content = open(self.document_path, 'rb').read()
parser = etree.XMLParser(remove_blank_text=True)
tree = etree.fromstring(xml_content, parser)
if os.path.exists(self.rels_path):
parser = etree.XMLParser(remove_blank_text=True)
tree_rels = etree.parse(self.rels_path, parser)
relationships = tree_rels.getroot()
else:
relationships = etree.Element('Relationships', xmlns='http://schemas.openxmlformats.org/package/2006/relationships')
existing_rids = [int(rid[3:]) for rid in relationships.xpath('//@Id', namespaces=self.namespaces['r']) if rid.startswith('rId') and rid[3:].isdigit()]
self.next_rid = max(existing_rids) + 1 if existing_rids else 1
return tree, relationships
def _process_bookmarks(self, tree, relationships, image_dict):
bookmark_starts = tree.xpath('//w:bookmarkStart', namespaces=self.namespaces)
for bookmark_start in bookmark_starts:
bookmark_name = bookmark_start.get(f'{self.namespaces["w"]}}}name')
if bookmark_name in image_dict:
image_path = image_dict[bookmark_name]
if os.path.isfile(image_path):
self._delete_text_before_bookmark(tree, bookmark_start)
self._insert_image_at_bookmark(tree, relationships, bookmark_start, image_path)
def _delete_text_before_bookmark(self, tree, bookmark_start):
parent = bookmark_start.getparent()
if parent is not None:
for element in parent.itersiblings(preceding=True):
if element.tag.endswith('}bookmarkEnd'):
break
if element.tag.endswith('}r') and element.xpath('.//w:t', namespaces=self.namespaces):
parent.remove(element)
for element in bookmark_start.itersiblings():
if element.tag.endswith('}bookmarkEnd'):
break
if element.tag.endswith('}r') and element.xpath('.//w:t', namespaces=self.namespaces):
parent.remove(element)
def _insert_image_at_bookmark(self, tree, relationships, bookmark_start, image_path):
image_name = f'image_{uuid.uuid4().hex}{os.path.splitext(image_path)[1]}'
rel_id = f'rId{self.next_rid}'
self.next_rid += 1
shutil.copy(image_path, os.path.join(self.media_dir, image_name))
drawing = self._create_image_xml(rel_id, image_name, image_path)
parent = bookmark_start.getparent()
if parent is not None:
parent.insert(parent.index(bookmark_start) + 1, drawing)
else:
body = tree.xpath('//w:body', namespaces=self.namespaces)[0]
body.append(drawing)
self._add_image_relationship(relationships, rel_id, f'media/{image_name}')
def _create_image_xml(self, rel_id, image_name, image_path):
with Image.open(image_path) as img:
width_px, height_px = img.size
cell_width_px = 200
cell_height_px = 100
width_ratio = cell_width_px / width_px
height_ratio = cell_height_px / height_px
scale_ratio = min(width_ratio, height_ratio)
emu_width = self._convert_pixels_to_emu(width_px)
emu_height = self._convert_pixels_to_emu(height_px)
drawing = etree.Element(f'{self.namespaces["w"]}}}drawing')
inline = etree.SubElement(drawing, f'{self.namespaces["wp"]}}}inline', {'distT': "0", 'distB': "0", 'distL': "0", 'distR': "0"})
etree.SubElement(inline, f'{self.namespaces["wp"]}}}extent', {'cx': str(emu_width), 'cy': str(emu_height)})
etree.SubElement(inline, f'{self.namespaces["wp"]}}}effectExtent', {'l': "0", 't': "0", 'r': "0", 'b': "0"})
etree.SubElement(inline, f'{self.namespaces["wp"]}}}docPr', {'id': "1", 'name': image_name, 'descr': ""})
cNvGraphicFramePr = etree.SubElement(inline, f'{self.namespaces["wp"]}}}cNvGraphicFramePr')
etree.SubElement(cNvGraphicFramePr, f'{self.namespaces["a"]}}}graphicFrameLocks', {'noChangeAspect': "1"})
graphic = etree.SubElement(inline, f'{self.namespaces["a"]}}}graphic')
graphicData = etree.SubElement(graphic, f'{self.namespaces["a"]}}}graphicData', {'uri': "http://schemas.openxmlformats.org/drawingml/2006/picture"})
pic = etree.SubElement(graphicData, f'{self.namespaces["pic"]}}}pic')
nvPicPr = etree.SubElement(pic, f'{self.namespaces["pic"]}}}nvPicPr')
etree.SubElement(nvPicPr, f'{self.namespaces["pic"]}}}cNvPr', {'id': "0", 'name': image_name})
etree.SubElement(nvPicPr, f'{self.namespaces["pic"]}}}cNvPicPr')
blipFill = etree.SubElement(pic, f'{self.namespaces["pic"]}}}blipFill')
etree.SubElement(blipFill, f'{self.namespaces["a"]}}}blip', {'embed': rel_id})
stretch = etree.SubElement(blipFill, f'{self.namespaces["a"]}}}stretch')
etree.SubElement(stretch, f'{self.namespaces["a"]}}}fillRect')
spPr = etree.SubElement(pic, f'{self.namespaces["pic"]}}}spPr')
xfrm = etree.SubElement(spPr, f'{self.namespaces["a"]}}}xfrm')
etree.SubElement(xfrm, f'{self.namespaces["a"]}}}off', {'x': "0", 'y': "0"})
etree.SubElement(xfrm, f'{self.namespaces["a"]}}}ext', {'cx': str(emu_width), 'cy': str(emu_height)})
etree.SubElement(spPr, f'{self.namespaces["a"]}}}prstGeom', {'prst': "rect"})
etree.SubElement(spPr, f'{self.namespaces["a"]}}}avLst')
return drawing
def _convert_pixels_to_emu(self, width_px, height_px, dpi=96):
emu_per_px = 914400 / dpi
return int(width_px * emu_per_px), int(height_px * emu_perpx)
def _add_image_relationship(self, relationships, rel_id, target):
etree.SubElement(relationships, 'Relationship', {
'Id': rel_id,
'Type': 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/image',
'Target': target
})
def _save_modified_files(self, tree, relationships):
with open(self.document_path, 'wb') as f:
f.write(etree.tostring(tree, pretty_print=True, encoding='UTF-8', xml_declaration=True))
if len(relationships) > 0:
with open(self.rels_path, 'wb') as f:
f.write(etree.tostring(relationships, pretty_print=True, encoding='UTF-8', xml_declaration=True))
def _repack_docx(self, output_path):
output_path = output_path or self.docx_path
with ZipFile(output_path, 'w', ZIP_DEFLATED) as z:
for root, dirs, files in os.walk(self.temp_dir):
for file in files:
file_path = os.path.join(root, file)
arcname = os.path.relpath(file_path, self.temp_dir)
z.write(file_path, arcname)
def _cleanup(self):
if os.path.exists(self.temp_dir):
shutil.rmtree(self.temp_dir)

以上代码定义了一个用于在Word文档的书签位置插入图片的类,主要功能包括:

  • 解压并解析Word文档的主文档和关系文件
  • 处理所有书签位置,插入指定图片
  • 清理临时文件
  • 示例使用方式如下:

    docx_path = r"C:\Leon\python_project\oceanxecm\2025\04\20250422-AI-投标书\word\附件4_法定代表人身份证明书.docx"
    output_path = "output.docx"
    image_dict = {
    "ID_CARD_FRONT": r"C:\Leon\python_project\oceanxecm\2025\04\20250422-AI-投标书\png\正面.png",
    "ID_CARD_BACK": r"C:\Leon\python_project\oceanxecm\2025\04\20250422-AI-投标书\png\背面.png"
    }
    inserter = DocxImageInserter(docx_path)
    inserter.insert_images_at_bookmarks(image_dict, output_path)

    该代码可以根据实际需求进行扩展和修改,适用于在Word文档中自动化处理图片插入任务。

    转载地址:http://zlqfk.baihongyu.com/

    你可能感兴趣的文章
    Objective-C实现一分钟倒计时(附完整源码)
    查看>>
    Objective-C实现三次样条曲线(附完整源码)
    查看>>
    Objective-C实现上传文件到FTP服务器(附完整源码)
    查看>>
    Objective-C实现两数之和问题(附完整源码)
    查看>>
    Objective-C实现串口通讯(附完整源码)
    查看>>
    Objective-C实现串逐位和(附完整源码)
    查看>>
    Objective-C实现主存储器空间的分配和回收(附完整源码)
    查看>>
    Objective-C实现乘方运算---m的n次方(附完整源码)
    查看>>
    Objective-C实现二叉树遍历算法(附完整源码)
    查看>>
    Objective-C实现二进制和算法(附完整源码)
    查看>>
    Objective-C实现二进制补码算法(附完整源码)
    查看>>
    Objective-C实现互斥锁同步执行两个线程函数(附完整源码)
    查看>>
    Objective-C实现交易密码算法(附完整源码)
    查看>>
    Objective-C实现低通滤波器(附完整源码)
    查看>>
    Objective-C实现使用管道重定向进程输入输出(附完整源码)
    查看>>
    Objective-C实现借记款项功能(附完整源码)
    查看>>
    Objective-C实现关系矩阵A和B的乘积(附完整源码)
    查看>>
    Objective-C实现内存映射文件(附完整源码)
    查看>>
    Objective-C实现内存泄露检查(附完整源码)
    查看>>
    Objective-C实现内格尔·施雷肯伯格算法(附完整源码)
    查看>>