123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623 |
- import datetime, shutil, os
- import time
- from sympy import *
- import uuid
- from sklearn.feature_extraction.text import CountVectorizer
- import numpy as np
- from win32com.client import Dispatch
- from multiprocessing import Process, Lock, Manager
- import multiprocessing
- from pyDes import *
- from PyQt5.QtWidgets import QApplication, QMainWindow, QFileDialog, QMessageBox, QDialog, QVBoxLayout,QLabel,QPushButton
- from PyQt5 import QtCore
- from PyQt5.QtCore import Qt
- from MainForm import Ui_MainWindow
- import encryption
- import re
-
- # def getMachineCode():
- # m_wmi = wmi.WMI()
- # cpu_info = m_wmi.Win32_Processor()
- # serial_number = 0
- # if len(cpu_info) > 0:
- # serial_number = cpu_info[0].ProcessorId
- # serial_number = serial_number.encode("utf-8")
- # machine_code = hashlib.md5(serial_number).hexdigest()
- # tm = time.localtime()
- # date = '%04d' % tm.tm_year + '%02d' % tm.tm_mon + '%02d' % tm.tm_mday
- # machine_code = machine_code + date
- # return machine_code
- #
- # def Encrypted(code):
- # # 使用DES-CBC加密算法加密机器码
- # Des_key = "fda34hfk" # 自定义 Key
- # Des_IV = "\x11\2\x2a\3\1\x27\2\0" # IV向量
- # k = des(Des_key, CBC, Des_IV, pad=None, padmode=PAD_PKCS5)
- # EncryptStr = k.encrypt(code)
- # return EncryptStr
- #
- # def Decrypted(code):
- # # 使用DES-CBC加密算法加密机器码
- # Des_key = "fda34hfk" # 自定义 Key
- # Des_IV = "\x11\2\x2a\3\1\x27\2\0" # IV向量
- # k = des(Des_key, CBC, Des_IV, pad=None, padmode=PAD_PKCS5)
- # DecryptStr = k.decrypt(code)
- # return DecryptStr
- #
- # def checkCode():
- # # 获取机器码
- # machine_code = getMachineCode()
- # # 自己定义 Encrypted 函数进行加密处理
- # encrypt_code = Encrypted(machine_code.encode("utf-8"))
- #
- # # 读取本地的授权文件
- # if os.path.exists("C:/Users/{0}/AppData/Roaming/cpregister.bin".format(getpass.getuser())):
- # with open("C:/Users/{0}/AppData/Roaming/cpregister.bin".format(getpass.getuser()), "r") as f:
- # key_code = f.read()
- # decrypt_code = Decrypted(key_code)
- # cpucode = decrypt_code
- # # 如果机器码经过加密后的值,等于授权码的值,则验证通过,否则验证失败
- # if key_code == encrypt_code:
- # return True
- # else:
- # return False
- # else:
- # print('机器码:' + str(machine_code))
- # registercode = input('注册码:')
- # if registercode == encrypt_code:
- # with open("C:/Users/{0}/AppData/Roaming/cpregister.bin".format(getpass.getuser()), 'wb') as file:
- # string_bytes = registercode.encode('utf-8')
- # file.write(string_bytes)
- # print('验证成功!')
- # else:
- # print('注册码有误!')
-
- def dataGet(s0):
- aa = 0
- datekey = {
- 'ze':'0','im':'1','tw':'2','ee':'3','fr':'4','ve':'5',
- 'ix':'6','se':'7','ei':'8','ni':'9'
- }
- ii = 0
- dastr = []
- while ii < 8:
- x1 = 2 * ii
- y1 = x1 + 2
- das = datekey[s0[x1:y1]]
- dastr.append(das)
- ii = ii + 1
- datestr = ''.join(dastr)
- mm = int(datestr[:2])
- dd = int(datestr[2:4])
- yy = int(datestr[4:])
- tm = time.localtime()
- dy = int(tm.tm_year)
- dm = int(tm.tm_mon)
- dday=int(tm.tm_mday)
- if dy < yy:
- aa = 1.7
- elif dy == yy:
- if dm < mm:
- aa = 1.7
- elif dm == mm:
- if dday <= dd:
- aa = 1.7
- else:
- aa = -3
- else:
- aa = -3
- else:
- aa = -3
- return aa
- def macget(sy0,ma0):
- mackey = {
- 'a':'10','b':'11','c':'12','d':'13','e':'14','f':'15'
- }
- aa = 0
- sy = list(sy0)
- ma = list(ma0)
- ii = 0
- node = uuid.getnode()
- MAC = uuid.UUID(int = node).hex[-12:]
- mac = MAC.lower()
- while ii < 4:
- try:
- syi = int(sy[ii])
- mai = ma[ii]
- maci = mac[syi]
- if maci == mai:
- aa = 0
- else:
- aa = -2
- break
- except:
- syi = int(mackey[sy[ii]])
- mai = ma[ii]
- maci = mac[syi]
- if maci == mai:
- aa = 0
- else:
- aa = -2
- break
- ii = ii + 1
- if aa == 0:
- aa = 1.7
- else:
- aa = -2
- return aa
- def pdd(kk):
- n = Symbol('n')
- if kk > limit(((3*n*n - 5)/(2*n*n + n)),n,oo) and kk < (integrate(n**2,[n,0,1]) * 6):
- # ----------------这句才是程序运行代码----------------------------------
- return 0
- # ----------------这句才是程序运行代码----------------------------------
- elif kk == -1:
- aa = encryption.macget()
- bb = encryption.plusmac(aa)
- cc = encryption.ordermac(bb)
- # print('密钥:' + cc)
- dialog = CustomDialog()
- dialog.label_text('许可文件不存在!复制密钥交给管理员获取许可\n密钥:' + cc)
- dialog.exec_()
- return kk
- # arcpy.AddMessage ('The License File Does Not Exist!')
- elif kk == -2:
- # arcpy.AddMessage ('License Password Error!')
- return kk
- elif kk == -3:
- # arcpy.AddMessage('Out Of Permitted Time!')
- return kk
- else:
- # arcpy.AddMessage ('License File Corrupted!')
- return kk
- def bsfGet(s0):
- aa = 0
- bsfkey = {
- 'bd':'a','cd':'c','7a':'d','ae':'e','97':'g','6k':'h',
- '57':'i','22':'D','2c':'m','8m':'o','3w':'p','11':'I',
- '5d':'r','4u':'s','9t':'t','au':'u','23':'x','yk':'y',
- 'za':'z','k4':'0','v6':'1','tw':'2','h0':'5','op':'7',
- 'po':'8','wt':'9','8x':':','yy':',','lo':' ',
- }
- ii = 0
- lens0 = len(s0)
- bsfstr = []
- while ii < (lens0 / 2):
- x1 = 2 * ii
- y1 = x1 + 2
- bsf = bsfkey[s0[x1:y1]]
- bsfstr.append(bsf)
- ii = ii + 1
- bsstr = ''.join(bsfstr)
- return bsstr
- def licen():
- aa = 0
- linsy = ''
- linma = ''
- s1 = 'aucd8x'
- s2 = '3w5d8m7aaucd9tlo11228x'
- s3 = 'cd573w6kae5d8x'
- # path = 'D:\\CMCtbxLisence\\tbxLisence.txt'
- # isExists=os.path.exists(path)
- # if not isExists:
- # path1 = 'E:\\CMCtbxLisence\\tbxLisence.txt'
- # isExists1=os.path.exists(path1)
- # if not isExists1:
- # path2 = 'F:\\CMCtbxLisence\\tbxLisence.txt'
- # isExists2=os.path.exists(path2)
- # if not isExists2:
- # aa = -1
- # else:
- # outpath = 'F:\\CMCtbxLisence\\tbxLisence.txt'
- # else:
- # outpath = 'E:\\CMCtbxLisence\\tbxLisence.txt'
- # else:
- # outpath = 'D:\\CMCtbxLisence\\tbxLisence.txt'
- outpath = os.path.expandvars("%APPDATA%") + '\\CMCexeLisence\\compaLisence.txt'
- isExists = os.path.exists(outpath)
- if not isExists:
- aa = -1
- if aa != -1:
- with open(outpath, 'r') as lines:
- for line in lines:
- t = bsfGet(s1)
- if re.search(bsfGet(s1),line):
- try:
- linrq1 = line.split(':',-1)[-1]
- linrq = linrq1.replace('\n','')
- aa = dataGet(linrq)
- if aa == -3:
- break
- except:
- aa = -9999
- if re.search(bsfGet(s2),line):
- try:
- linSY1 = line.split('-',-1)[-1]
- linSY = linSY1.replace('\n','')
- linsy = linSY.lower()
- except:
- aa = -9999
- if re.search(bsfGet(s3),line):
- try:
- linma1 = line.split('-',-1)[0]
- linMA1 = linma1.split(':',-1)[-1]
- linMA = linMA1.replace('\n','')
- linma = linMA.lower()
- aa = macget(linsy,linma)
- except:
- aa = -9999
- else:
- aa = -1
- return pdd(aa)
-
- class MainWindow(QMainWindow, Ui_MainWindow):
- def __init__(self):
- super(MainWindow, self).__init__()
- self.setupUi(self)
- self.retranslateUi(self)
- self.initUI()
- self.set_window() # 定制窗口
- self.miniButton.clicked.connect(self.window().showMinimized)
- self.setAttribute(QtCore.Qt.WA_TranslucentBackground)
- self.closeButton.clicked.connect(self.window().close)
- self.draggable = False
- self.offset = None
-
- def mousePressEvent(self, event):
- if event.button() == QtCore.Qt.LeftButton:
- self.draggable = True
- self.offset = event.pos()
-
- def mouseMoveEvent(self, event):
- if self.draggable:
- self.move(event.globalPos() - self.offset)
-
- def mouseReleaseEvent(self, event):
- if event.button() == QtCore.Qt.LeftButton:
- self.draggable = False
-
- def set_window(self):
- # 关闭系统标题栏
- self.setWindowFlag(QtCore.Qt.FramelessWindowHint)
-
- def initUI(self):
- self.setWindowTitle('文章对比查重')
- self.setFixedSize(self.width(), self.height())
-
- self.toolButton_5.clicked.connect(lambda: self.open_file_dialog(3))
- self.toolButton_4.clicked.connect(lambda: self.open_file_dialog(2))
- self.toolButton_3.clicked.connect(lambda: self.open_file_dialog(1))
- self.pushButton_2.clicked.connect(self.btn_ok)
-
- def open_file_dialog(self, flag):
- if flag == 1:
- file_name = QFileDialog.getExistingDirectory(self, '打开文件夹', 'C:/', QFileDialog.ShowDirsOnly)
- if file_name != '':
- self.lineEdit.setText(file_name)
- self.lineEdit.setFocus()
- if flag == 2:
- file_name = QFileDialog.getOpenFileName(self, '打开文件', '/path/to/dir', "Word文件 (*.doc *.docx)")
- if file_name != '':
- file_name, _ = file_name
- self.lineEdit_2.setText(file_name)
- self.lineEdit_2.setFocus()
- if flag == 3:
- file_name, _ = QFileDialog.getOpenFileName(self, '打开文件', '/path/to/dir', "Word文件 (*.doc *.docx)")
- if file_name:
- self.lineEdit.setText(os.path.dirname(file_name))
- self.lineEdit_3.setText(file_name)
- self.lineEdit_3.setFocus()
-
- def btn_ok(self):
- if self.lineEdit_3.text() == '':
- QMessageBox.warning(self, '消息', '待查文章文件不可为空!')
- return
- elif self.lineEdit_2.text() == '':
- QMessageBox.warning(self, '消息', '对比文章文件不可为空!')
- return
- elif self.lineEdit.text() == '':
- QMessageBox.warning(self, '消息', '结果输出文件夹不可为空!')
- return
-
- licencode = licen()
- if licencode == 0:
- try:
- inpath = self.lineEdit_3.text()
- dbpath = self.lineEdit_2.text()
- outpath = self.lineEdit.text()
- wordcompare(inpath, dbpath, outpath)
- except Exception as e:
- QMessageBox.critical(self, '错误', str(e))
- return
- QMessageBox.information(self, '消息', '对比完成')
- elif licencode == -2:
- # QMessageBox.warning(self, '警告', '许可密钥错误!')
- aa = encryption.macget()
- bb = encryption.plusmac(aa)
- cc = encryption.ordermac(bb)
- dialog = CustomDialog()
- dialog.label_text('许可密钥错误!复制密钥交给管理员获取许可\n密钥:' + cc)
- dialog.exec_()
- elif licencode == -3:
- # QMessageBox.warning(self, '警告', '超过许可时间!')
- aa = encryption.macget()
- bb = encryption.plusmac(aa)
- cc = encryption.ordermac(bb)
- dialog = CustomDialog()
- dialog.label_text('超过许可时间!复制密钥交给管理员获取许可\n密钥:' + cc)
- dialog.exec_()
- elif licencode != -1:
- # QMessageBox.warning(self, '警告', '许可文件损坏!')
- aa = encryption.macget()
- bb = encryption.plusmac(aa)
- cc = encryption.ordermac(bb)
- dialog = CustomDialog()
- dialog.label_text('许可文件损坏!复制密钥交给管理员获取许可\n密钥:' + cc)
- dialog.exec_()
-
- # 提示窗口,显示密钥
- class CustomDialog(QDialog):
- def __init__(self, parent=None):
- super().__init__(parent)
-
- # 设置对话框的标题
- self.setWindowTitle("提示")
-
- # 创建一个垂直布局
- layout = QVBoxLayout()
-
- # 创建一个QLabel来显示文本
- self.label = QLabel("这是一段可复制的文本内容。你可以尝试选择它并复制。")
- # 启用文本交互,允许用户选择文本
- self.label.setTextInteractionFlags(Qt.TextSelectableByMouse | Qt.TextSelectableByKeyboard)
-
- # 将QLabel添加到布局中
- layout.addWidget(self.label)
-
- # 创建一个按钮来关闭对话框
- self.close_button = QPushButton("关闭")
- # 将按钮的clicked信号连接到close槽函数来关闭对话框
- self.close_button.clicked.connect(self.close)
-
- # 将按钮添加到布局中
- layout.addWidget(self.close_button)
-
- # 设置对话框的布局
- self.setLayout(layout)
-
- def label_text(self, new_text):
- self.label.setText(new_text)
-
-
- def getText(wordname,word):
- doc = word.Documents.Open(FileName=wordname, Encoding='gbk')
- texts = []
- wd = ''
- for para in doc.paragraphs:
- txt = para.Range.Text.strip().rstrip(chr(13)+'\x07')
- txt = txt.rstrip(chr(13) + '\n')
- txt = txt.rstrip(chr(13))
- txt = txt.rstrip('\x01')
- texts.append(txt)
- wd += txt
- doc.Close()
- return texts, wd
-
-
- def is_Chinese(word):
- for ch in word:
- if '\u4e00' <= ch <= '\u9fff':
- return True
- return False
-
-
- def msplit(s, seperators='\?|。|?|!'):
- return re.split(seperators, s)
-
-
- def readDocx(docfile,word):
- print('*' * 80)
- print('文件', docfile, '加载中……')
- t1 = datetime.datetime.now()
- paras, wd = getText(docfile,word)
- segs = []
- for p in paras:
- temp = []
- for s in msplit(p):
- temp.append(s.replace(' ', ""))
- # temp += s.replace(' ', "")
- if len(temp) > 0:
- segs.append(temp)
- t2 = datetime.datetime.now()
- print('加载完成,用时: ', t2 - t1)
- showInfo(segs, docfile)
- return segs, wd
-
- chars = 0
- def showInfo(doc, filename='filename'):
- global chars
- segs = 0
- for p in doc:
- for s in p:
- segs = segs + 1
- chars = chars + len(s)
- print('段落数: {0:>8d} 个。'.format(len(doc)))
- print('短句数: {0:>8d} 句。'.format(segs))
- print('字符数: {0:>8d} 个。'.format(chars))
-
-
- def jaccard_similarity(s1, s2):
- def add_space(s):
- return ' '.join(list(s))
-
- # 将字中间加入空格
- s1, s2 = add_space(s1), add_space(s2)
- # 转化为TF矩阵
- cv = CountVectorizer(tokenizer=lambda s: s.split())
- corpus = [s1, s2]
-
- # fit_transform() 方法可以将字符串转化为词频矩阵
- vectors = cv.fit_transform(corpus).toarray()
-
- # 求交集
- numerator = np.sum(np.min(vectors, axis=0))
- # 求并集
- denominator = np.sum(np.max(vectors, axis=0))
- # 计算杰卡德系数
- return 1.0 * numerator / denominator
-
-
- def compareParagraph(doc1, i, doc2, j):
- p1 = doc1[i]
- p2 = doc2[j]
- flag = False
- repetion = [flag]
- cosine_sim = 0
- similarity = 0
- for s1 in p1:
- if len(s1) == 0 or s1.__contains__('\r'):
- continue
- for s2 in p2:
- if len(s2) == 0 or s2.__contains__('\r'):
- continue
- if len(s1) > 10:
- # # 使用TF-IDF向量化文本
- # vectorizer = TfidfVectorizer().fit_transform([s1, s2]).toarray()
- # # 计算余弦相似度
- # cosine_sim = cosine_similarity(vectorizer[0:1], vectorizer[1:2])[0][0]
- cosine_sim = jaccard_similarity(s1, s2)
- if cosine_sim > 0.5:
- comment = '重复内容:{0}\n重复度:{1:.2f}%'.format(s2, cosine_sim * 100)
- repetion[0] = True
- temp = [flag, i, p1.index(s1), comment]
- repetion.append(temp)
- elif len(s1) >= 2 and len(s1) <= 10:
- similarity = jaccard_similarity(s1, s2)
- if similarity > 0.5:
- comment = '重复内容:{0}\n重复度:{1:.2f}%'.format(s2, similarity * 100)
- repetion[0] = True
- temp = [flag, i, p1.index(s1), comment]
- repetion.append(temp)
- return repetion
- # if repetion[0] == False:
- # return repetion
-
-
- repetion_char = 0
- mark_count = 0
- def mark(docdb, marklist):
- global repetion_char
- global mark_count
- for i in range(len(marklist)):
- para_index = marklist[i][1]
- sentence_index = marklist[i][2]
- comment = marklist[i][3]
- para = docdb.paragraphs[para_index]
- if para.Range.Sentences.Count == 1:
- txt = para.Range.Text
- else:
- txt = para.Range.Sentences[sentence_index].Text
-
- txt = txt.strip().rstrip(chr(13) + '\n')
- txt = txt.rstrip(chr(13) + '\x07')
- txt = txt.rstrip(chr(13))
- txt = txt.rstrip('\x01')
- repetion_char += len(txt)
- if para.Range.Sentences.Count == 1:
- docdb.Comments.Add(
- Range=docdb.Range(Start=docdb.paragraphs[para_index].Range.Characters[0].Start,
- End=docdb.paragraphs[para_index].Range.Characters[len(txt) - 1].End),
- Text=comment)
- mark_count = mark_count + 1
- print('已标记'+str(mark_count)+'处')
- else:
- docdb.Comments.Add(
- Range=docdb.Range(Start=docdb.paragraphs[para_index].Range.Sentences[sentence_index].Characters[0].Start,
- End=docdb.paragraphs[para_index].Range.Sentences[sentence_index].Characters[
- len(txt) - 1].End),
- Text=comment)
- mark_count = mark_count + 1
- print('已标记' + str(mark_count) + '处')
- docdb.Save()
-
-
- def compare(doc1, doc2, start, end, result):
- # word = Dispatch('Word.Application')
- # word.Visible = False
- # path = os.path.join(dst_dir, '对比.docx')
- # docdb = word.Documents.Open(FileName=path, Encoding='gbk')
- for i in range(start, end):
- for j in range(len(doc2)):
- rp = compareParagraph(doc1, i, doc2, j)
- if rp[0] == True:
- print(str(i))
- with Lock():
- # 使用锁来同步写入结果,防止多进程写入冲突
- for i in range(1, len(rp)):
- result.append(rp[i])
- break
-
-
- def wordcompare(src_file, db_file, dst_dir):
- shutil.copy(src_file, os.path.join(dst_dir, '对比结果' + os.path.splitext(src_file)[1]))
-
- word = Dispatch('Word.Application')
- word.Visible = 0
- doc1, wd1 = readDocx(src_file, word)
- charcount = chars
- doc2, wd2 = readDocx(db_file, word)
- word.Quit()
-
- # # 使用TF-IDF向量化文本
- # vectorizer = TfidfVectorizer().fit_transform([wd1, wd2]).toarray()
- # # 计算余弦相似度
- # cosine = cosine_similarity(vectorizer[0:1], vectorizer[1:2])[0][0]
-
- print('开始对比...'.center(80, '*'))
- t1 = datetime.datetime.now()
- index1 = [0, len(doc1) // 2]
- index2 = [len(doc1) // 2, len(doc1)]
- manager = Manager()
- result = manager.list()
- processes = []
- p1 = Process(target=compare, args=(doc1, doc2, index1[0], index1[1], result))
- p2 = Process(target=compare, args=(doc1, doc2, index2[0], index2[1], result))
-
- processes.append(p1)
- processes.append(p2)
-
- # 启动进程
- for p in processes:
- p.start()
-
- # 等待所有进程完成
- for p in processes:
- p.join()
-
- print('共有'+str(len(result))+'处相似')
- word1 = Dispatch('Word.Application')
- word1.Visible = 0
- path = os.path.join(dst_dir, '对比结果' + os.path.splitext(src_file)[1])
- docdb = word1.Documents.Open(FileName=path, Encoding='gbk')
- mark(docdb, result)
-
- cosine = repetion_char/charcount
- myRange = docdb.Range(0, 0)
- myRange.InsertBefore('整体相似度:' + '%.2f' % (cosine * 100) + '%\n')
- docdb.Save()
- docdb.Close()
- word1.Quit()
- t2 = datetime.datetime.now()
- print('\n对比完成,总用时: ', t2 - t1)
-
-
- if __name__ == '__main__':
- multiprocessing.freeze_support()
- app = QApplication(sys.argv)
- myshow = MainWindow()
- myshow.show()
- sys.exit(app.exec_())
- # wordcompare('F://2024//07//文章对比查重//6月20日-甘孜抽水蓄能电站预可阶段测绘技术服务(中水、永鸿、田册)招标//甘孜抽水蓄能电站预可阶段'
- # '测绘技术服务技术文件2024.6.18-测绘公司.docx',
- # 'F://2024//07//文章对比查重//6月20日-甘孜抽水蓄能电站预可阶段测绘技术服务(中水、永鸿、田册)招标//甘孜抽水蓄能电站预可阶段测绘技术服务-田册.docx',
- # 'F://2024//07//文章对比查重//')
|