工具箱相关
Du kannst nicht mehr als 25 Themen auswählen Themen müssen mit entweder einem Buchstaben oder einer Ziffer beginnen. Sie können Bindestriche („-“) enthalten und bis zu 35 Zeichen lang sein.

Compare1_latest.py 21KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623
  1. import datetime, shutil, os
  2. import time
  3. from sympy import *
  4. import uuid
  5. from sklearn.feature_extraction.text import CountVectorizer
  6. import numpy as np
  7. from win32com.client import Dispatch
  8. from multiprocessing import Process, Lock, Manager
  9. import multiprocessing
  10. from pyDes import *
  11. from PyQt5.QtWidgets import QApplication, QMainWindow, QFileDialog, QMessageBox, QDialog, QVBoxLayout,QLabel,QPushButton
  12. from PyQt5 import QtCore
  13. from PyQt5.QtCore import Qt
  14. from MainForm import Ui_MainWindow
  15. import encryption
  16. import re
  17. # def getMachineCode():
  18. # m_wmi = wmi.WMI()
  19. # cpu_info = m_wmi.Win32_Processor()
  20. # serial_number = 0
  21. # if len(cpu_info) > 0:
  22. # serial_number = cpu_info[0].ProcessorId
  23. # serial_number = serial_number.encode("utf-8")
  24. # machine_code = hashlib.md5(serial_number).hexdigest()
  25. # tm = time.localtime()
  26. # date = '%04d' % tm.tm_year + '%02d' % tm.tm_mon + '%02d' % tm.tm_mday
  27. # machine_code = machine_code + date
  28. # return machine_code
  29. #
  30. # def Encrypted(code):
  31. # # 使用DES-CBC加密算法加密机器码
  32. # Des_key = "fda34hfk" # 自定义 Key
  33. # Des_IV = "\x11\2\x2a\3\1\x27\2\0" # IV向量
  34. # k = des(Des_key, CBC, Des_IV, pad=None, padmode=PAD_PKCS5)
  35. # EncryptStr = k.encrypt(code)
  36. # return EncryptStr
  37. #
  38. # def Decrypted(code):
  39. # # 使用DES-CBC加密算法加密机器码
  40. # Des_key = "fda34hfk" # 自定义 Key
  41. # Des_IV = "\x11\2\x2a\3\1\x27\2\0" # IV向量
  42. # k = des(Des_key, CBC, Des_IV, pad=None, padmode=PAD_PKCS5)
  43. # DecryptStr = k.decrypt(code)
  44. # return DecryptStr
  45. #
  46. # def checkCode():
  47. # # 获取机器码
  48. # machine_code = getMachineCode()
  49. # # 自己定义 Encrypted 函数进行加密处理
  50. # encrypt_code = Encrypted(machine_code.encode("utf-8"))
  51. #
  52. # # 读取本地的授权文件
  53. # if os.path.exists("C:/Users/{0}/AppData/Roaming/cpregister.bin".format(getpass.getuser())):
  54. # with open("C:/Users/{0}/AppData/Roaming/cpregister.bin".format(getpass.getuser()), "r") as f:
  55. # key_code = f.read()
  56. # decrypt_code = Decrypted(key_code)
  57. # cpucode = decrypt_code
  58. # # 如果机器码经过加密后的值,等于授权码的值,则验证通过,否则验证失败
  59. # if key_code == encrypt_code:
  60. # return True
  61. # else:
  62. # return False
  63. # else:
  64. # print('机器码:' + str(machine_code))
  65. # registercode = input('注册码:')
  66. # if registercode == encrypt_code:
  67. # with open("C:/Users/{0}/AppData/Roaming/cpregister.bin".format(getpass.getuser()), 'wb') as file:
  68. # string_bytes = registercode.encode('utf-8')
  69. # file.write(string_bytes)
  70. # print('验证成功!')
  71. # else:
  72. # print('注册码有误!')
  73. def dataGet(s0):
  74. aa = 0
  75. datekey = {
  76. 'ze':'0','im':'1','tw':'2','ee':'3','fr':'4','ve':'5',
  77. 'ix':'6','se':'7','ei':'8','ni':'9'
  78. }
  79. ii = 0
  80. dastr = []
  81. while ii < 8:
  82. x1 = 2 * ii
  83. y1 = x1 + 2
  84. das = datekey[s0[x1:y1]]
  85. dastr.append(das)
  86. ii = ii + 1
  87. datestr = ''.join(dastr)
  88. mm = int(datestr[:2])
  89. dd = int(datestr[2:4])
  90. yy = int(datestr[4:])
  91. tm = time.localtime()
  92. dy = int(tm.tm_year)
  93. dm = int(tm.tm_mon)
  94. dday=int(tm.tm_mday)
  95. if dy < yy:
  96. aa = 1.7
  97. elif dy == yy:
  98. if dm < mm:
  99. aa = 1.7
  100. elif dm == mm:
  101. if dday <= dd:
  102. aa = 1.7
  103. else:
  104. aa = -3
  105. else:
  106. aa = -3
  107. else:
  108. aa = -3
  109. return aa
  110. def macget(sy0,ma0):
  111. mackey = {
  112. 'a':'10','b':'11','c':'12','d':'13','e':'14','f':'15'
  113. }
  114. aa = 0
  115. sy = list(sy0)
  116. ma = list(ma0)
  117. ii = 0
  118. node = uuid.getnode()
  119. MAC = uuid.UUID(int = node).hex[-12:]
  120. mac = MAC.lower()
  121. while ii < 4:
  122. try:
  123. syi = int(sy[ii])
  124. mai = ma[ii]
  125. maci = mac[syi]
  126. if maci == mai:
  127. aa = 0
  128. else:
  129. aa = -2
  130. break
  131. except:
  132. syi = int(mackey[sy[ii]])
  133. mai = ma[ii]
  134. maci = mac[syi]
  135. if maci == mai:
  136. aa = 0
  137. else:
  138. aa = -2
  139. break
  140. ii = ii + 1
  141. if aa == 0:
  142. aa = 1.7
  143. else:
  144. aa = -2
  145. return aa
  146. def pdd(kk):
  147. n = Symbol('n')
  148. if kk > limit(((3*n*n - 5)/(2*n*n + n)),n,oo) and kk < (integrate(n**2,[n,0,1]) * 6):
  149. # ----------------这句才是程序运行代码----------------------------------
  150. return 0
  151. # ----------------这句才是程序运行代码----------------------------------
  152. elif kk == -1:
  153. aa = encryption.macget()
  154. bb = encryption.plusmac(aa)
  155. cc = encryption.ordermac(bb)
  156. # print('密钥:' + cc)
  157. dialog = CustomDialog()
  158. dialog.label_text('许可文件不存在!复制密钥交给管理员获取许可\n密钥:' + cc)
  159. dialog.exec_()
  160. return kk
  161. # arcpy.AddMessage ('The License File Does Not Exist!')
  162. elif kk == -2:
  163. # arcpy.AddMessage ('License Password Error!')
  164. return kk
  165. elif kk == -3:
  166. # arcpy.AddMessage('Out Of Permitted Time!')
  167. return kk
  168. else:
  169. # arcpy.AddMessage ('License File Corrupted!')
  170. return kk
  171. def bsfGet(s0):
  172. aa = 0
  173. bsfkey = {
  174. 'bd':'a','cd':'c','7a':'d','ae':'e','97':'g','6k':'h',
  175. '57':'i','22':'D','2c':'m','8m':'o','3w':'p','11':'I',
  176. '5d':'r','4u':'s','9t':'t','au':'u','23':'x','yk':'y',
  177. 'za':'z','k4':'0','v6':'1','tw':'2','h0':'5','op':'7',
  178. 'po':'8','wt':'9','8x':':','yy':',','lo':' ',
  179. }
  180. ii = 0
  181. lens0 = len(s0)
  182. bsfstr = []
  183. while ii < (lens0 / 2):
  184. x1 = 2 * ii
  185. y1 = x1 + 2
  186. bsf = bsfkey[s0[x1:y1]]
  187. bsfstr.append(bsf)
  188. ii = ii + 1
  189. bsstr = ''.join(bsfstr)
  190. return bsstr
  191. def licen():
  192. aa = 0
  193. linsy = ''
  194. linma = ''
  195. s1 = 'aucd8x'
  196. s2 = '3w5d8m7aaucd9tlo11228x'
  197. s3 = 'cd573w6kae5d8x'
  198. # path = 'D:\\CMCtbxLisence\\tbxLisence.txt'
  199. # isExists=os.path.exists(path)
  200. # if not isExists:
  201. # path1 = 'E:\\CMCtbxLisence\\tbxLisence.txt'
  202. # isExists1=os.path.exists(path1)
  203. # if not isExists1:
  204. # path2 = 'F:\\CMCtbxLisence\\tbxLisence.txt'
  205. # isExists2=os.path.exists(path2)
  206. # if not isExists2:
  207. # aa = -1
  208. # else:
  209. # outpath = 'F:\\CMCtbxLisence\\tbxLisence.txt'
  210. # else:
  211. # outpath = 'E:\\CMCtbxLisence\\tbxLisence.txt'
  212. # else:
  213. # outpath = 'D:\\CMCtbxLisence\\tbxLisence.txt'
  214. outpath = os.path.expandvars("%APPDATA%") + '\\CMCexeLisence\\compaLisence.txt'
  215. isExists = os.path.exists(outpath)
  216. if not isExists:
  217. aa = -1
  218. if aa != -1:
  219. with open(outpath, 'r') as lines:
  220. for line in lines:
  221. t = bsfGet(s1)
  222. if re.search(bsfGet(s1),line):
  223. try:
  224. linrq1 = line.split(':',-1)[-1]
  225. linrq = linrq1.replace('\n','')
  226. aa = dataGet(linrq)
  227. if aa == -3:
  228. break
  229. except:
  230. aa = -9999
  231. if re.search(bsfGet(s2),line):
  232. try:
  233. linSY1 = line.split('-',-1)[-1]
  234. linSY = linSY1.replace('\n','')
  235. linsy = linSY.lower()
  236. except:
  237. aa = -9999
  238. if re.search(bsfGet(s3),line):
  239. try:
  240. linma1 = line.split('-',-1)[0]
  241. linMA1 = linma1.split(':',-1)[-1]
  242. linMA = linMA1.replace('\n','')
  243. linma = linMA.lower()
  244. aa = macget(linsy,linma)
  245. except:
  246. aa = -9999
  247. else:
  248. aa = -1
  249. return pdd(aa)
  250. class MainWindow(QMainWindow, Ui_MainWindow):
  251. def __init__(self):
  252. super(MainWindow, self).__init__()
  253. self.setupUi(self)
  254. self.retranslateUi(self)
  255. self.initUI()
  256. self.set_window() # 定制窗口
  257. self.miniButton.clicked.connect(self.window().showMinimized)
  258. self.setAttribute(QtCore.Qt.WA_TranslucentBackground)
  259. self.closeButton.clicked.connect(self.window().close)
  260. self.draggable = False
  261. self.offset = None
  262. def mousePressEvent(self, event):
  263. if event.button() == QtCore.Qt.LeftButton:
  264. self.draggable = True
  265. self.offset = event.pos()
  266. def mouseMoveEvent(self, event):
  267. if self.draggable:
  268. self.move(event.globalPos() - self.offset)
  269. def mouseReleaseEvent(self, event):
  270. if event.button() == QtCore.Qt.LeftButton:
  271. self.draggable = False
  272. def set_window(self):
  273. # 关闭系统标题栏
  274. self.setWindowFlag(QtCore.Qt.FramelessWindowHint)
  275. def initUI(self):
  276. self.setWindowTitle('文章对比查重')
  277. self.setFixedSize(self.width(), self.height())
  278. self.toolButton_5.clicked.connect(lambda: self.open_file_dialog(3))
  279. self.toolButton_4.clicked.connect(lambda: self.open_file_dialog(2))
  280. self.toolButton_3.clicked.connect(lambda: self.open_file_dialog(1))
  281. self.pushButton_2.clicked.connect(self.btn_ok)
  282. def open_file_dialog(self, flag):
  283. if flag == 1:
  284. file_name = QFileDialog.getExistingDirectory(self, '打开文件夹', 'C:/', QFileDialog.ShowDirsOnly)
  285. if file_name != '':
  286. self.lineEdit.setText(file_name)
  287. self.lineEdit.setFocus()
  288. if flag == 2:
  289. file_name = QFileDialog.getOpenFileName(self, '打开文件', '/path/to/dir', "Word文件 (*.doc *.docx)")
  290. if file_name != '':
  291. file_name, _ = file_name
  292. self.lineEdit_2.setText(file_name)
  293. self.lineEdit_2.setFocus()
  294. if flag == 3:
  295. file_name, _ = QFileDialog.getOpenFileName(self, '打开文件', '/path/to/dir', "Word文件 (*.doc *.docx)")
  296. if file_name:
  297. self.lineEdit.setText(os.path.dirname(file_name))
  298. self.lineEdit_3.setText(file_name)
  299. self.lineEdit_3.setFocus()
  300. def btn_ok(self):
  301. if self.lineEdit_3.text() == '':
  302. QMessageBox.warning(self, '消息', '待查文章文件不可为空!')
  303. return
  304. elif self.lineEdit_2.text() == '':
  305. QMessageBox.warning(self, '消息', '对比文章文件不可为空!')
  306. return
  307. elif self.lineEdit.text() == '':
  308. QMessageBox.warning(self, '消息', '结果输出文件夹不可为空!')
  309. return
  310. licencode = licen()
  311. if licencode == 0:
  312. try:
  313. inpath = self.lineEdit_3.text()
  314. dbpath = self.lineEdit_2.text()
  315. outpath = self.lineEdit.text()
  316. wordcompare(inpath, dbpath, outpath)
  317. except Exception as e:
  318. QMessageBox.critical(self, '错误', str(e))
  319. return
  320. QMessageBox.information(self, '消息', '对比完成')
  321. elif licencode == -2:
  322. # QMessageBox.warning(self, '警告', '许可密钥错误!')
  323. aa = encryption.macget()
  324. bb = encryption.plusmac(aa)
  325. cc = encryption.ordermac(bb)
  326. dialog = CustomDialog()
  327. dialog.label_text('许可密钥错误!复制密钥交给管理员获取许可\n密钥:' + cc)
  328. dialog.exec_()
  329. elif licencode == -3:
  330. # QMessageBox.warning(self, '警告', '超过许可时间!')
  331. aa = encryption.macget()
  332. bb = encryption.plusmac(aa)
  333. cc = encryption.ordermac(bb)
  334. dialog = CustomDialog()
  335. dialog.label_text('超过许可时间!复制密钥交给管理员获取许可\n密钥:' + cc)
  336. dialog.exec_()
  337. elif licencode != -1:
  338. # QMessageBox.warning(self, '警告', '许可文件损坏!')
  339. aa = encryption.macget()
  340. bb = encryption.plusmac(aa)
  341. cc = encryption.ordermac(bb)
  342. dialog = CustomDialog()
  343. dialog.label_text('许可文件损坏!复制密钥交给管理员获取许可\n密钥:' + cc)
  344. dialog.exec_()
  345. # 提示窗口,显示密钥
  346. class CustomDialog(QDialog):
  347. def __init__(self, parent=None):
  348. super().__init__(parent)
  349. # 设置对话框的标题
  350. self.setWindowTitle("提示")
  351. # 创建一个垂直布局
  352. layout = QVBoxLayout()
  353. # 创建一个QLabel来显示文本
  354. self.label = QLabel("这是一段可复制的文本内容。你可以尝试选择它并复制。")
  355. # 启用文本交互,允许用户选择文本
  356. self.label.setTextInteractionFlags(Qt.TextSelectableByMouse | Qt.TextSelectableByKeyboard)
  357. # 将QLabel添加到布局中
  358. layout.addWidget(self.label)
  359. # 创建一个按钮来关闭对话框
  360. self.close_button = QPushButton("关闭")
  361. # 将按钮的clicked信号连接到close槽函数来关闭对话框
  362. self.close_button.clicked.connect(self.close)
  363. # 将按钮添加到布局中
  364. layout.addWidget(self.close_button)
  365. # 设置对话框的布局
  366. self.setLayout(layout)
  367. def label_text(self, new_text):
  368. self.label.setText(new_text)
  369. def getText(wordname,word):
  370. doc = word.Documents.Open(FileName=wordname, Encoding='gbk')
  371. texts = []
  372. wd = ''
  373. for para in doc.paragraphs:
  374. txt = para.Range.Text.strip().rstrip(chr(13)+'\x07')
  375. txt = txt.rstrip(chr(13) + '\n')
  376. txt = txt.rstrip(chr(13))
  377. txt = txt.rstrip('\x01')
  378. texts.append(txt)
  379. wd += txt
  380. doc.Close()
  381. return texts, wd
  382. def is_Chinese(word):
  383. for ch in word:
  384. if '\u4e00' <= ch <= '\u9fff':
  385. return True
  386. return False
  387. def msplit(s, seperators='\?|。|?|!'):
  388. return re.split(seperators, s)
  389. def readDocx(docfile,word):
  390. print('*' * 80)
  391. print('文件', docfile, '加载中……')
  392. t1 = datetime.datetime.now()
  393. paras, wd = getText(docfile,word)
  394. segs = []
  395. for p in paras:
  396. temp = []
  397. for s in msplit(p):
  398. temp.append(s.replace(' ', ""))
  399. # temp += s.replace(' ', "")
  400. if len(temp) > 0:
  401. segs.append(temp)
  402. t2 = datetime.datetime.now()
  403. print('加载完成,用时: ', t2 - t1)
  404. showInfo(segs, docfile)
  405. return segs, wd
  406. chars = 0
  407. def showInfo(doc, filename='filename'):
  408. global chars
  409. segs = 0
  410. for p in doc:
  411. for s in p:
  412. segs = segs + 1
  413. chars = chars + len(s)
  414. print('段落数: {0:>8d} 个。'.format(len(doc)))
  415. print('短句数: {0:>8d} 句。'.format(segs))
  416. print('字符数: {0:>8d} 个。'.format(chars))
  417. def jaccard_similarity(s1, s2):
  418. def add_space(s):
  419. return ' '.join(list(s))
  420. # 将字中间加入空格
  421. s1, s2 = add_space(s1), add_space(s2)
  422. # 转化为TF矩阵
  423. cv = CountVectorizer(tokenizer=lambda s: s.split())
  424. corpus = [s1, s2]
  425. # fit_transform() 方法可以将字符串转化为词频矩阵
  426. vectors = cv.fit_transform(corpus).toarray()
  427. # 求交集
  428. numerator = np.sum(np.min(vectors, axis=0))
  429. # 求并集
  430. denominator = np.sum(np.max(vectors, axis=0))
  431. # 计算杰卡德系数
  432. return 1.0 * numerator / denominator
  433. def compareParagraph(doc1, i, doc2, j):
  434. p1 = doc1[i]
  435. p2 = doc2[j]
  436. flag = False
  437. repetion = [flag]
  438. cosine_sim = 0
  439. similarity = 0
  440. for s1 in p1:
  441. if len(s1) == 0 or s1.__contains__('\r'):
  442. continue
  443. for s2 in p2:
  444. if len(s2) == 0 or s2.__contains__('\r'):
  445. continue
  446. if len(s1) > 10:
  447. # # 使用TF-IDF向量化文本
  448. # vectorizer = TfidfVectorizer().fit_transform([s1, s2]).toarray()
  449. # # 计算余弦相似度
  450. # cosine_sim = cosine_similarity(vectorizer[0:1], vectorizer[1:2])[0][0]
  451. cosine_sim = jaccard_similarity(s1, s2)
  452. if cosine_sim > 0.5:
  453. comment = '重复内容:{0}\n重复度:{1:.2f}%'.format(s2, cosine_sim * 100)
  454. repetion[0] = True
  455. temp = [flag, i, p1.index(s1), comment]
  456. repetion.append(temp)
  457. elif len(s1) >= 2 and len(s1) <= 10:
  458. similarity = jaccard_similarity(s1, s2)
  459. if similarity > 0.5:
  460. comment = '重复内容:{0}\n重复度:{1:.2f}%'.format(s2, similarity * 100)
  461. repetion[0] = True
  462. temp = [flag, i, p1.index(s1), comment]
  463. repetion.append(temp)
  464. return repetion
  465. # if repetion[0] == False:
  466. # return repetion
  467. repetion_char = 0
  468. mark_count = 0
  469. def mark(docdb, marklist):
  470. global repetion_char
  471. global mark_count
  472. for i in range(len(marklist)):
  473. para_index = marklist[i][1]
  474. sentence_index = marklist[i][2]
  475. comment = marklist[i][3]
  476. para = docdb.paragraphs[para_index]
  477. if para.Range.Sentences.Count == 1:
  478. txt = para.Range.Text
  479. else:
  480. txt = para.Range.Sentences[sentence_index].Text
  481. txt = txt.strip().rstrip(chr(13) + '\n')
  482. txt = txt.rstrip(chr(13) + '\x07')
  483. txt = txt.rstrip(chr(13))
  484. txt = txt.rstrip('\x01')
  485. repetion_char += len(txt)
  486. if para.Range.Sentences.Count == 1:
  487. docdb.Comments.Add(
  488. Range=docdb.Range(Start=docdb.paragraphs[para_index].Range.Characters[0].Start,
  489. End=docdb.paragraphs[para_index].Range.Characters[len(txt) - 1].End),
  490. Text=comment)
  491. mark_count = mark_count + 1
  492. print('已标记'+str(mark_count)+'处')
  493. else:
  494. docdb.Comments.Add(
  495. Range=docdb.Range(Start=docdb.paragraphs[para_index].Range.Sentences[sentence_index].Characters[0].Start,
  496. End=docdb.paragraphs[para_index].Range.Sentences[sentence_index].Characters[
  497. len(txt) - 1].End),
  498. Text=comment)
  499. mark_count = mark_count + 1
  500. print('已标记' + str(mark_count) + '处')
  501. docdb.Save()
  502. def compare(doc1, doc2, start, end, result):
  503. # word = Dispatch('Word.Application')
  504. # word.Visible = False
  505. # path = os.path.join(dst_dir, '对比.docx')
  506. # docdb = word.Documents.Open(FileName=path, Encoding='gbk')
  507. for i in range(start, end):
  508. for j in range(len(doc2)):
  509. rp = compareParagraph(doc1, i, doc2, j)
  510. if rp[0] == True:
  511. print(str(i))
  512. with Lock():
  513. # 使用锁来同步写入结果,防止多进程写入冲突
  514. for i in range(1, len(rp)):
  515. result.append(rp[i])
  516. break
  517. def wordcompare(src_file, db_file, dst_dir):
  518. shutil.copy(src_file, os.path.join(dst_dir, '对比结果' + os.path.splitext(src_file)[1]))
  519. word = Dispatch('Word.Application')
  520. word.Visible = 0
  521. doc1, wd1 = readDocx(src_file, word)
  522. charcount = chars
  523. doc2, wd2 = readDocx(db_file, word)
  524. word.Quit()
  525. # # 使用TF-IDF向量化文本
  526. # vectorizer = TfidfVectorizer().fit_transform([wd1, wd2]).toarray()
  527. # # 计算余弦相似度
  528. # cosine = cosine_similarity(vectorizer[0:1], vectorizer[1:2])[0][0]
  529. print('开始对比...'.center(80, '*'))
  530. t1 = datetime.datetime.now()
  531. index1 = [0, len(doc1) // 2]
  532. index2 = [len(doc1) // 2, len(doc1)]
  533. manager = Manager()
  534. result = manager.list()
  535. processes = []
  536. p1 = Process(target=compare, args=(doc1, doc2, index1[0], index1[1], result))
  537. p2 = Process(target=compare, args=(doc1, doc2, index2[0], index2[1], result))
  538. processes.append(p1)
  539. processes.append(p2)
  540. # 启动进程
  541. for p in processes:
  542. p.start()
  543. # 等待所有进程完成
  544. for p in processes:
  545. p.join()
  546. print('共有'+str(len(result))+'处相似')
  547. word1 = Dispatch('Word.Application')
  548. word1.Visible = 0
  549. path = os.path.join(dst_dir, '对比结果' + os.path.splitext(src_file)[1])
  550. docdb = word1.Documents.Open(FileName=path, Encoding='gbk')
  551. mark(docdb, result)
  552. cosine = repetion_char/charcount
  553. myRange = docdb.Range(0, 0)
  554. myRange.InsertBefore('整体相似度:' + '%.2f' % (cosine * 100) + '%\n')
  555. docdb.Save()
  556. docdb.Close()
  557. word1.Quit()
  558. t2 = datetime.datetime.now()
  559. print('\n对比完成,总用时: ', t2 - t1)
  560. if __name__ == '__main__':
  561. multiprocessing.freeze_support()
  562. app = QApplication(sys.argv)
  563. myshow = MainWindow()
  564. myshow.show()
  565. sys.exit(app.exec_())
  566. # wordcompare('F://2024//07//文章对比查重//6月20日-甘孜抽水蓄能电站预可阶段测绘技术服务(中水、永鸿、田册)招标//甘孜抽水蓄能电站预可阶段'
  567. # '测绘技术服务技术文件2024.6.18-测绘公司.docx',
  568. # 'F://2024//07//文章对比查重//6月20日-甘孜抽水蓄能电站预可阶段测绘技术服务(中水、永鸿、田册)招标//甘孜抽水蓄能电站预可阶段测绘技术服务-田册.docx',
  569. # 'F://2024//07//文章对比查重//')