import datetime, shutil, os import time from sympy import * import uuid from sklearn.feature_extraction.text import CountVectorizer import numpy as np from win32com.client import Dispatch from multiprocessing import Process, Lock, Manager import multiprocessing from pyDes import * from PyQt5.QtWidgets import QApplication, QMainWindow, QFileDialog, QMessageBox, QDialog, QVBoxLayout,QLabel,QPushButton from PyQt5 import QtCore from PyQt5.QtCore import Qt from MainForm import Ui_MainWindow import encryption import re # def getMachineCode(): # m_wmi = wmi.WMI() # cpu_info = m_wmi.Win32_Processor() # serial_number = 0 # if len(cpu_info) > 0: # serial_number = cpu_info[0].ProcessorId # serial_number = serial_number.encode("utf-8") # machine_code = hashlib.md5(serial_number).hexdigest() # tm = time.localtime() # date = '%04d' % tm.tm_year + '%02d' % tm.tm_mon + '%02d' % tm.tm_mday # machine_code = machine_code + date # return machine_code # # def Encrypted(code): # # 使用DES-CBC加密算法加密机器码 # Des_key = "fda34hfk" # 自定义 Key # Des_IV = "\x11\2\x2a\3\1\x27\2\0" # IV向量 # k = des(Des_key, CBC, Des_IV, pad=None, padmode=PAD_PKCS5) # EncryptStr = k.encrypt(code) # return EncryptStr # # def Decrypted(code): # # 使用DES-CBC加密算法加密机器码 # Des_key = "fda34hfk" # 自定义 Key # Des_IV = "\x11\2\x2a\3\1\x27\2\0" # IV向量 # k = des(Des_key, CBC, Des_IV, pad=None, padmode=PAD_PKCS5) # DecryptStr = k.decrypt(code) # return DecryptStr # # def checkCode(): # # 获取机器码 # machine_code = getMachineCode() # # 自己定义 Encrypted 函数进行加密处理 # encrypt_code = Encrypted(machine_code.encode("utf-8")) # # # 读取本地的授权文件 # if os.path.exists("C:/Users/{0}/AppData/Roaming/cpregister.bin".format(getpass.getuser())): # with open("C:/Users/{0}/AppData/Roaming/cpregister.bin".format(getpass.getuser()), "r") as f: # key_code = f.read() # decrypt_code = Decrypted(key_code) # cpucode = decrypt_code # # 如果机器码经过加密后的值,等于授权码的值,则验证通过,否则验证失败 # if key_code == encrypt_code: # return True # else: # return False # else: # print('机器码:' + str(machine_code)) # registercode = input('注册码:') # if registercode == encrypt_code: # with open("C:/Users/{0}/AppData/Roaming/cpregister.bin".format(getpass.getuser()), 'wb') as file: # string_bytes = registercode.encode('utf-8') # file.write(string_bytes) # print('验证成功!') # else: # print('注册码有误!') def dataGet(s0): aa = 0 datekey = { 'ze':'0','im':'1','tw':'2','ee':'3','fr':'4','ve':'5', 'ix':'6','se':'7','ei':'8','ni':'9' } ii = 0 dastr = [] while ii < 8: x1 = 2 * ii y1 = x1 + 2 das = datekey[s0[x1:y1]] dastr.append(das) ii = ii + 1 datestr = ''.join(dastr) mm = int(datestr[:2]) dd = int(datestr[2:4]) yy = int(datestr[4:]) tm = time.localtime() dy = int(tm.tm_year) dm = int(tm.tm_mon) dday=int(tm.tm_mday) if dy < yy: aa = 1.7 elif dy == yy: if dm < mm: aa = 1.7 elif dm == mm: if dday <= dd: aa = 1.7 else: aa = -3 else: aa = -3 else: aa = -3 return aa def macget(sy0,ma0): mackey = { 'a':'10','b':'11','c':'12','d':'13','e':'14','f':'15' } aa = 0 sy = list(sy0) ma = list(ma0) ii = 0 node = uuid.getnode() MAC = uuid.UUID(int = node).hex[-12:] mac = MAC.lower() while ii < 4: try: syi = int(sy[ii]) mai = ma[ii] maci = mac[syi] if maci == mai: aa = 0 else: aa = -2 break except: syi = int(mackey[sy[ii]]) mai = ma[ii] maci = mac[syi] if maci == mai: aa = 0 else: aa = -2 break ii = ii + 1 if aa == 0: aa = 1.7 else: aa = -2 return aa def pdd(kk): n = Symbol('n') if kk > limit(((3*n*n - 5)/(2*n*n + n)),n,oo) and kk < (integrate(n**2,[n,0,1]) * 6): # ----------------这句才是程序运行代码---------------------------------- return 0 # ----------------这句才是程序运行代码---------------------------------- elif kk == -1: aa = encryption.macget() bb = encryption.plusmac(aa) cc = encryption.ordermac(bb) # print('密钥:' + cc) dialog = CustomDialog() dialog.label_text('许可文件不存在!复制密钥交给管理员获取许可\n密钥:' + cc) dialog.exec_() return kk # arcpy.AddMessage ('The License File Does Not Exist!') elif kk == -2: # arcpy.AddMessage ('License Password Error!') return kk elif kk == -3: # arcpy.AddMessage('Out Of Permitted Time!') return kk else: # arcpy.AddMessage ('License File Corrupted!') return kk def bsfGet(s0): aa = 0 bsfkey = { 'bd':'a','cd':'c','7a':'d','ae':'e','97':'g','6k':'h', '57':'i','22':'D','2c':'m','8m':'o','3w':'p','11':'I', '5d':'r','4u':'s','9t':'t','au':'u','23':'x','yk':'y', 'za':'z','k4':'0','v6':'1','tw':'2','h0':'5','op':'7', 'po':'8','wt':'9','8x':':','yy':',','lo':' ', } ii = 0 lens0 = len(s0) bsfstr = [] while ii < (lens0 / 2): x1 = 2 * ii y1 = x1 + 2 bsf = bsfkey[s0[x1:y1]] bsfstr.append(bsf) ii = ii + 1 bsstr = ''.join(bsfstr) return bsstr def licen(): aa = 0 linsy = '' linma = '' s1 = 'aucd8x' s2 = '3w5d8m7aaucd9tlo11228x' s3 = 'cd573w6kae5d8x' # path = 'D:\\CMCtbxLisence\\tbxLisence.txt' # isExists=os.path.exists(path) # if not isExists: # path1 = 'E:\\CMCtbxLisence\\tbxLisence.txt' # isExists1=os.path.exists(path1) # if not isExists1: # path2 = 'F:\\CMCtbxLisence\\tbxLisence.txt' # isExists2=os.path.exists(path2) # if not isExists2: # aa = -1 # else: # outpath = 'F:\\CMCtbxLisence\\tbxLisence.txt' # else: # outpath = 'E:\\CMCtbxLisence\\tbxLisence.txt' # else: # outpath = 'D:\\CMCtbxLisence\\tbxLisence.txt' outpath = os.path.expandvars("%APPDATA%") + '\\CMCexeLisence\\compaLisence.txt' isExists = os.path.exists(outpath) if not isExists: aa = -1 if aa != -1: with open(outpath, 'r') as lines: for line in lines: t = bsfGet(s1) if re.search(bsfGet(s1),line): try: linrq1 = line.split(':',-1)[-1] linrq = linrq1.replace('\n','') aa = dataGet(linrq) if aa == -3: break except: aa = -9999 if re.search(bsfGet(s2),line): try: linSY1 = line.split('-',-1)[-1] linSY = linSY1.replace('\n','') linsy = linSY.lower() except: aa = -9999 if re.search(bsfGet(s3),line): try: linma1 = line.split('-',-1)[0] linMA1 = linma1.split(':',-1)[-1] linMA = linMA1.replace('\n','') linma = linMA.lower() aa = macget(linsy,linma) except: aa = -9999 else: aa = -1 return pdd(aa) class MainWindow(QMainWindow, Ui_MainWindow): def __init__(self): super(MainWindow, self).__init__() self.setupUi(self) self.retranslateUi(self) self.initUI() self.set_window() # 定制窗口 self.miniButton.clicked.connect(self.window().showMinimized) self.setAttribute(QtCore.Qt.WA_TranslucentBackground) self.closeButton.clicked.connect(self.window().close) self.draggable = False self.offset = None def mousePressEvent(self, event): if event.button() == QtCore.Qt.LeftButton: self.draggable = True self.offset = event.pos() def mouseMoveEvent(self, event): if self.draggable: self.move(event.globalPos() - self.offset) def mouseReleaseEvent(self, event): if event.button() == QtCore.Qt.LeftButton: self.draggable = False def set_window(self): # 关闭系统标题栏 self.setWindowFlag(QtCore.Qt.FramelessWindowHint) def initUI(self): self.setWindowTitle('文章对比查重') self.setFixedSize(self.width(), self.height()) self.toolButton_5.clicked.connect(lambda: self.open_file_dialog(3)) self.toolButton_4.clicked.connect(lambda: self.open_file_dialog(2)) self.toolButton_3.clicked.connect(lambda: self.open_file_dialog(1)) self.pushButton_2.clicked.connect(self.btn_ok) def open_file_dialog(self, flag): if flag == 1: file_name = QFileDialog.getExistingDirectory(self, '打开文件夹', 'C:/', QFileDialog.ShowDirsOnly) if file_name != '': self.lineEdit.setText(file_name) self.lineEdit.setFocus() if flag == 2: file_name = QFileDialog.getOpenFileName(self, '打开文件', '/path/to/dir', "Word文件 (*.doc *.docx)") if file_name != '': file_name, _ = file_name self.lineEdit_2.setText(file_name) self.lineEdit_2.setFocus() if flag == 3: file_name, _ = QFileDialog.getOpenFileName(self, '打开文件', '/path/to/dir', "Word文件 (*.doc *.docx)") if file_name: self.lineEdit.setText(os.path.dirname(file_name)) self.lineEdit_3.setText(file_name) self.lineEdit_3.setFocus() def btn_ok(self): if self.lineEdit_3.text() == '': QMessageBox.warning(self, '消息', '待查文章文件不可为空!') return elif self.lineEdit_2.text() == '': QMessageBox.warning(self, '消息', '对比文章文件不可为空!') return elif self.lineEdit.text() == '': QMessageBox.warning(self, '消息', '结果输出文件夹不可为空!') return licencode = licen() if licencode == 0: try: inpath = self.lineEdit_3.text() dbpath = self.lineEdit_2.text() outpath = self.lineEdit.text() wordcompare(inpath, dbpath, outpath) except Exception as e: QMessageBox.critical(self, '错误', str(e)) return QMessageBox.information(self, '消息', '对比完成') elif licencode == -2: # QMessageBox.warning(self, '警告', '许可密钥错误!') aa = encryption.macget() bb = encryption.plusmac(aa) cc = encryption.ordermac(bb) dialog = CustomDialog() dialog.label_text('许可密钥错误!复制密钥交给管理员获取许可\n密钥:' + cc) dialog.exec_() elif licencode == -3: # QMessageBox.warning(self, '警告', '超过许可时间!') aa = encryption.macget() bb = encryption.plusmac(aa) cc = encryption.ordermac(bb) dialog = CustomDialog() dialog.label_text('超过许可时间!复制密钥交给管理员获取许可\n密钥:' + cc) dialog.exec_() elif licencode != -1: # QMessageBox.warning(self, '警告', '许可文件损坏!') aa = encryption.macget() bb = encryption.plusmac(aa) cc = encryption.ordermac(bb) dialog = CustomDialog() dialog.label_text('许可文件损坏!复制密钥交给管理员获取许可\n密钥:' + cc) dialog.exec_() # 提示窗口,显示密钥 class CustomDialog(QDialog): def __init__(self, parent=None): super().__init__(parent) # 设置对话框的标题 self.setWindowTitle("提示") # 创建一个垂直布局 layout = QVBoxLayout() # 创建一个QLabel来显示文本 self.label = QLabel("这是一段可复制的文本内容。你可以尝试选择它并复制。") # 启用文本交互,允许用户选择文本 self.label.setTextInteractionFlags(Qt.TextSelectableByMouse | Qt.TextSelectableByKeyboard) # 将QLabel添加到布局中 layout.addWidget(self.label) # 创建一个按钮来关闭对话框 self.close_button = QPushButton("关闭") # 将按钮的clicked信号连接到close槽函数来关闭对话框 self.close_button.clicked.connect(self.close) # 将按钮添加到布局中 layout.addWidget(self.close_button) # 设置对话框的布局 self.setLayout(layout) def label_text(self, new_text): self.label.setText(new_text) def getText(wordname,word): doc = word.Documents.Open(FileName=wordname, Encoding='gbk') texts = [] wd = '' for para in doc.paragraphs: txt = para.Range.Text.strip().rstrip(chr(13)+'\x07') txt = txt.rstrip(chr(13) + '\n') txt = txt.rstrip(chr(13)) txt = txt.rstrip('\x01') texts.append(txt) wd += txt doc.Close() return texts, wd def is_Chinese(word): for ch in word: if '\u4e00' <= ch <= '\u9fff': return True return False def msplit(s, seperators='\?|。|?|!'): return re.split(seperators, s) def readDocx(docfile,word): print('*' * 80) print('文件', docfile, '加载中……') t1 = datetime.datetime.now() paras, wd = getText(docfile,word) segs = [] for p in paras: temp = [] for s in msplit(p): temp.append(s.replace(' ', "")) # temp += s.replace(' ', "") if len(temp) > 0: segs.append(temp) t2 = datetime.datetime.now() print('加载完成,用时: ', t2 - t1) showInfo(segs, docfile) return segs, wd chars = 0 def showInfo(doc, filename='filename'): global chars segs = 0 for p in doc: for s in p: segs = segs + 1 chars = chars + len(s) print('段落数: {0:>8d} 个。'.format(len(doc))) print('短句数: {0:>8d} 句。'.format(segs)) print('字符数: {0:>8d} 个。'.format(chars)) def jaccard_similarity(s1, s2): def add_space(s): return ' '.join(list(s)) # 将字中间加入空格 s1, s2 = add_space(s1), add_space(s2) # 转化为TF矩阵 cv = CountVectorizer(tokenizer=lambda s: s.split()) corpus = [s1, s2] # fit_transform() 方法可以将字符串转化为词频矩阵 vectors = cv.fit_transform(corpus).toarray() # 求交集 numerator = np.sum(np.min(vectors, axis=0)) # 求并集 denominator = np.sum(np.max(vectors, axis=0)) # 计算杰卡德系数 return 1.0 * numerator / denominator def compareParagraph(doc1, i, doc2, j): p1 = doc1[i] p2 = doc2[j] flag = False repetion = [flag] cosine_sim = 0 similarity = 0 for s1 in p1: if len(s1) == 0 or s1.__contains__('\r'): continue for s2 in p2: if len(s2) == 0 or s2.__contains__('\r'): continue if len(s1) > 10: # # 使用TF-IDF向量化文本 # vectorizer = TfidfVectorizer().fit_transform([s1, s2]).toarray() # # 计算余弦相似度 # cosine_sim = cosine_similarity(vectorizer[0:1], vectorizer[1:2])[0][0] cosine_sim = jaccard_similarity(s1, s2) if cosine_sim > 0.5: comment = '重复内容:{0}\n重复度:{1:.2f}%'.format(s2, cosine_sim * 100) repetion[0] = True temp = [flag, i, p1.index(s1), comment] repetion.append(temp) elif len(s1) >= 2 and len(s1) <= 10: similarity = jaccard_similarity(s1, s2) if similarity > 0.5: comment = '重复内容:{0}\n重复度:{1:.2f}%'.format(s2, similarity * 100) repetion[0] = True temp = [flag, i, p1.index(s1), comment] repetion.append(temp) return repetion # if repetion[0] == False: # return repetion repetion_char = 0 mark_count = 0 def mark(docdb, marklist): global repetion_char global mark_count for i in range(len(marklist)): para_index = marklist[i][1] sentence_index = marklist[i][2] comment = marklist[i][3] para = docdb.paragraphs[para_index] if para.Range.Sentences.Count == 1: txt = para.Range.Text else: txt = para.Range.Sentences[sentence_index].Text txt = txt.strip().rstrip(chr(13) + '\n') txt = txt.rstrip(chr(13) + '\x07') txt = txt.rstrip(chr(13)) txt = txt.rstrip('\x01') repetion_char += len(txt) if para.Range.Sentences.Count == 1: docdb.Comments.Add( Range=docdb.Range(Start=docdb.paragraphs[para_index].Range.Characters[0].Start, End=docdb.paragraphs[para_index].Range.Characters[len(txt) - 1].End), Text=comment) mark_count = mark_count + 1 print('已标记'+str(mark_count)+'处') else: docdb.Comments.Add( Range=docdb.Range(Start=docdb.paragraphs[para_index].Range.Sentences[sentence_index].Characters[0].Start, End=docdb.paragraphs[para_index].Range.Sentences[sentence_index].Characters[ len(txt) - 1].End), Text=comment) mark_count = mark_count + 1 print('已标记' + str(mark_count) + '处') docdb.Save() def compare(doc1, doc2, start, end, result): # word = Dispatch('Word.Application') # word.Visible = False # path = os.path.join(dst_dir, '对比.docx') # docdb = word.Documents.Open(FileName=path, Encoding='gbk') for i in range(start, end): for j in range(len(doc2)): rp = compareParagraph(doc1, i, doc2, j) if rp[0] == True: print(str(i)) with Lock(): # 使用锁来同步写入结果,防止多进程写入冲突 for i in range(1, len(rp)): result.append(rp[i]) break def wordcompare(src_file, db_file, dst_dir): shutil.copy(src_file, os.path.join(dst_dir, '对比结果' + os.path.splitext(src_file)[1])) word = Dispatch('Word.Application') word.Visible = 0 doc1, wd1 = readDocx(src_file, word) charcount = chars doc2, wd2 = readDocx(db_file, word) word.Quit() # # 使用TF-IDF向量化文本 # vectorizer = TfidfVectorizer().fit_transform([wd1, wd2]).toarray() # # 计算余弦相似度 # cosine = cosine_similarity(vectorizer[0:1], vectorizer[1:2])[0][0] print('开始对比...'.center(80, '*')) t1 = datetime.datetime.now() index1 = [0, len(doc1) // 2] index2 = [len(doc1) // 2, len(doc1)] manager = Manager() result = manager.list() processes = [] p1 = Process(target=compare, args=(doc1, doc2, index1[0], index1[1], result)) p2 = Process(target=compare, args=(doc1, doc2, index2[0], index2[1], result)) processes.append(p1) processes.append(p2) # 启动进程 for p in processes: p.start() # 等待所有进程完成 for p in processes: p.join() print('共有'+str(len(result))+'处相似') word1 = Dispatch('Word.Application') word1.Visible = 0 path = os.path.join(dst_dir, '对比结果' + os.path.splitext(src_file)[1]) docdb = word1.Documents.Open(FileName=path, Encoding='gbk') mark(docdb, result) cosine = repetion_char/charcount myRange = docdb.Range(0, 0) myRange.InsertBefore('整体相似度:' + '%.2f' % (cosine * 100) + '%\n') docdb.Save() docdb.Close() word1.Quit() t2 = datetime.datetime.now() print('\n对比完成,总用时: ', t2 - t1) if __name__ == '__main__': multiprocessing.freeze_support() app = QApplication(sys.argv) myshow = MainWindow() myshow.show() sys.exit(app.exec_()) # wordcompare('F://2024//07//文章对比查重//6月20日-甘孜抽水蓄能电站预可阶段测绘技术服务(中水、永鸿、田册)招标//甘孜抽水蓄能电站预可阶段' # '测绘技术服务技术文件2024.6.18-测绘公司.docx', # 'F://2024//07//文章对比查重//6月20日-甘孜抽水蓄能电站预可阶段测绘技术服务(中水、永鸿、田册)招标//甘孜抽水蓄能电站预可阶段测绘技术服务-田册.docx', # 'F://2024//07//文章对比查重//')