由于工作中需要從大量docx文檔中提出圖片,于是到網上搜索,找了一大堆都是分析xml文件并提取的,太過于復雜,實際上有更簡單的方法,我們需要用到python-docx這個第三方庫,但該庫并未開發提取圖片功能,不過可以通過其他方法出得到圖片數據并保存為圖片。
本文為原創,如需轉載請注明出處(僅處理docx文檔,不能處理doc文檔,如果需要可執行文件的,可將代碼文件通過pyinstaller進行編譯)。
軟件界面
下面這段代碼是核心:
for file in os.listdir(filePath):
try:
#跳過非docx文件
if ".docx" not in file:
continue
# 創建imgPath
subImgPath = imgPath + re.sub(".docx","",file)
if not os.path.exists(subImgPath):
os.makedirs(subImgPath)
doc = docx.Document(filePath + file) #打開文件
for rel in doc.part._rels:
rel = doc.part._rels[rel] #獲得資源
if "image" not in rel.target_ref:
continue
imgName = re.findall("/(.*)",rel.target_ref)[0]
with open(subImgPath + "/" + imgName,"wb") as f:
f.write(rel.target_part.blob)
UI.currentFile.setText("當前文件:" + imgName)
except:
continue
后來經過改進,使用PyQt5制作了界面,下面為源代碼:
import docx,re,os,sys,ui_imgExtract
from PyQt5.QtWidgets import QApplication,QMainWindow,QWidget,QMessageBox
from PyQt5.Qt import QFileDialog
def run():
filePath = UI.filePath.text()
imgPath = UI.imgPath.text()
if not os.path.exists(filePath):
QMessageBox.about(main, "錯誤", "請選擇docx文件目錄!")
return
if not os.path.exists(imgPath):
os.makedirs(imgPath)
for file in os.listdir(filePath):
try:
#跳過非docx文件
if ".docx" not in file:
continue
# 創建imgPath
subImgPath = imgPath + re.sub(".docx","",file)
if not os.path.exists(subImgPath):
os.makedirs(subImgPath)
doc = docx.Document(filePath + file) #打開文件
for rel in doc.part._rels:
rel = doc.part._rels[rel] #獲得資源
if "image" not in rel.target_ref:
continue
imgName = re.findall("/(.*)",rel.target_ref)[0]
with open(subImgPath + "/" + imgName,"wb") as f:
f.write(rel.target_part.blob)
UI.currentFile.setText("當前文件:" + imgName)
except:
continue
QMessageBox.about(main, "完成", "圖片提取已完成!")
def init():
UI.btnRun.clicked.connect(run) #綁定開始提取按鈕
UI.btnFilePath.clicked.connect(choiceFileDir) # 綁定選擇docx文件目錄
UI.btnImgPath.clicked.connect(choiceImgOutPutDir) #綁定選擇圖片保存目錄
# docx文件默認目錄
UI.filePath.setText(os.getcwd())
#默認輸出目錄
if not os.path.exists(os.getcwd() + "img\"):
os.makedirs(os.getcwd() + "img\")
UI.imgPath.setText(os.getcwd() + "img\")
#選擇docx文件目錄
def choiceFileDir():
dir = QFileDialog.getExistingDirectory(main, "選擇docx文件目錄", os.getcwd())
UI.filePath.setText(dir + "/")
#選擇圖片保存目錄
def choiceImgOutPutDir():
dir = QFileDialog.getExistingDirectory(main, "選擇輸出目錄", os.getcwd())
UI.imgPath.setText(dir + "/")
if __name__ == "__main__":
app = QApplication(sys.argv)
main = QWidget()
UI = ui_imgExtract.Ui_Form()
UI.setupUi(main)
main.show()
init()
sys.exit(app.exec_())
下面是界面文件ui_imgExtract.py:
# -*- coding: utf-8 -*-
# Form implementation generated from reading ui file 'ui_iask.ui'
#
# Created by: PyQt5 UI code generator 5.11.3
#
# WARNING! All changes made in this file will be lost!
from PyQt5 import QtCore, QtGui, QtWidgets
class Ui_Form(object):
def setupUi(self, Form):
Form.setObjectName("Form")
Form.resize(604, 100)
self.layoutWidget = QtWidgets.QWidget(Form)
self.layoutWidget.setGeometry(QtCore.QRect(10, 10, 581, 83))
self.layoutWidget.setObjectName("layoutWidget")
self.gridLayout_4 = QtWidgets.QGridLayout(self.layoutWidget)
self.gridLayout_4.setContentsMargins(0, 0, 0, 0)
self.gridLayout_4.setObjectName("gridLayout_4")
self.label_8 = QtWidgets.QLabel(self.layoutWidget)
self.label_8.setObjectName("label_8")
self.gridLayout_4.addWidget(self.label_8, 0, 0, 1, 1)
self.filePath = QtWidgets.QLineEdit(self.layoutWidget)
self.filePath.setObjectName("filePath")
self.gridLayout_4.addWidget(self.filePath, 0, 1, 1, 1)
self.btnFilePath = QtWidgets.QPushButton(self.layoutWidget)
self.btnFilePath.setObjectName("btnFilePath")
self.gridLayout_4.addWidget(self.btnFilePath, 0, 2, 1, 1)
self.label_9 = QtWidgets.QLabel(self.layoutWidget)
self.label_9.setObjectName("label_9")
self.gridLayout_4.addWidget(self.label_9, 1, 0, 1, 1)
self.imgPath = QtWidgets.QLineEdit(self.layoutWidget)
self.imgPath.setObjectName("imgPath")
self.gridLayout_4.addWidget(self.imgPath, 1, 1, 1, 1)
self.btnImgPath = QtWidgets.QPushButton(self.layoutWidget)
self.btnImgPath.setObjectName("btnImgPath")
self.gridLayout_4.addWidget(self.btnImgPath, 1, 2, 1, 1)
self.btnRun = QtWidgets.QPushButton(self.layoutWidget)
self.btnRun.setObjectName("btnRun")
self.gridLayout_4.addWidget(self.btnRun, 2, 2, 1, 1)
self.currentFile = QtWidgets.QLabel(self.layoutWidget)
self.currentFile.setObjectName("currentFile")
self.gridLayout_4.addWidget(self.currentFile, 2, 0, 1, 2)
self.retranslateUi(Form)
QtCore.QMetaObject.connectSlotsByName(Form)
def retranslateUi(self, Form):
_translate = QtCore.QCoreApplication.translate
Form.setWindowTitle(_translate("Form", "docx圖片批量提取"))
self.label_8.setText(_translate("Form", "docx文件目錄:"))
self.btnFilePath.setText(_translate("Form", "選擇"))
self.label_9.setText(_translate("Form", "圖片保存目錄:"))
self.btnImgPath.setText(_translate("Form", "選擇"))
self.btnRun.setText(_translate("Form", "開始提取"))
self.currentFile.setText(_translate("Form", "當前文件:"))
版權聲明:本文內容由互聯網用戶自發貢獻,該文觀點僅代表作者本人。本站僅提供信息存儲空間服務,不擁有所有權,不承擔相關法律責任。如發現本站有涉嫌抄襲侵權/違法違規的內容, 請發送郵件至 舉報,一經查實,本站將立刻刪除。
發表評論
請登錄后評論...
登錄后才能評論