from bs4 import BeautifulSoup
from operator import itemgetter, attrgetter
from difflib import SequenceMatcher
import xlwt
import xlrd
import os
import time

def get_lv(count):
    if count <= 300:
        return '种地'
    if count <= 320:
        return '种植'
    if count <= 340:
        return '种田'
    if count <= 360:
        return '种作'
    if count <= 380:
        return '耕地'
    if count <= 400:
        return '耕种'
    if count <= 450:
        return '耕田'
    if count <= 500:
        return '耕作'
    if count <= 1000:
        return '耕耘'
    if count <= 1050:
        return '开荒'
    if count <= 1100:
        return '开垦'
    if count <= 1150:
        return '垦荒'
    if count <= 1200:
        return '拓荒'
    if count <= 1300:
        return '垦植'
    if count <= 1400:
        return '栽种'
    if count <= 1500:
        return '栽植'
    if count <= 1600:
        return '培植'
    if count <= 1700:
        return '栽培'
    if count <= 1800:
        return '莳植'
    if count <= 2000:
        return '农耕'
    if count <= 2300:
        return '解甲归田'
    if count <= 2600:
        return '稼穑艰难'
    if count <= 2900:
        return '火耕流种'
    if count <= 3500:
        return '精耕细作'
    if count <= 5000:
        return '耕耘树艺'
    return '橐驼之技'

with open('html.txt', encoding='UTF-8') as f:
    html_file=f.read()
html_file=html_file.replace('<br/>','\n')
html_file=html_file.replace('<br>','\n')
html_file=html_file.replace('</br>','\n')
#with open('html_temp.txt', 'w') as f:
    #f.write(html_file)
soup = BeautifulSoup(html_file)
lis = soup.find_all('li', attrs={"class":"border-1px icon-bg index-middle bg-white index-news-li mb-10"})
#with open("Output.txt", "w",encoding='UTF-8') as text_file:
#    text_file.write(str(lis))
#text_count={}
lists_ansers=[]
names = set()
for ms in lis:
    teacher_name = ms.find('h5', attrs={'class':'font-16'}).string
    #print("出题人:"+teacher_name)
    if teacher_name == '张子祺':
        #print('Ohh, 到了张子祺的地盘了，stop.')
        break
    if teacher_name != '张瀚斤':
        continue
    ansers = ms.find_all('li')
    #print('共有'+str(len(ansers))+'个回答')
    has=set()
    list_ansers=[]
    for ans in ansers:
        #print(ans)
        spans = ans.find_all('span')
        #print(spans)
        student_name=spans[0].string
        names.add(student_name)
        #print(spans[1])
        #print('回答者:'+student_name)
        #print('答案:'+spans[1].string)
        #print('答案长度:'+str(len(spans[1].string)))
        #text_count[student_name] = text_count.get(student_name,0) + len(spans[1].string)
        if student_name in has:
            #text_count[student_name] = text_count[student_name] - int(list_ansers[-1][2])
            list_ansers[-1][1]=spans[1].string
            list_ansers[-1][2]=str(len(spans[1].string))
        else:
            list_ansers.append([student_name, spans[1].string, str(len(spans[1].string))])
            has.add(student_name)
    #计算查重率
    for i in range(len(list_ansers)):
        list_ansers[i].append(0.0)
        list_ansers[i].append("null")
        for j in range(len(list_ansers)):
            if i == j:
                break #只和前面的比，防止抄袭
            #print(list_ansers[i][1]+" "+list_ansers[j][1])
            s = SequenceMatcher(None, list_ansers[i][1], list_ansers[j][1])
            differ = s.ratio()
            if differ > list_ansers[i][3]:
                list_ansers[i][3] = differ
                list_ansers[i][4] = list_ansers[j][0]
    sorted(list_ansers, key=itemgetter(2, 0, 1))
    #print(list_ansers)
    lists_ansers.append(list_ansers)
#print(lists_ansers)
#text_count=dict(sorted(text_count.items(), key=lambda x: x[1], reverse=True))
#print(text_count)
problem_info={}
for key in names:
    problem_info[key]=list()
for la in lists_ansers:
    fg={}
    for key in names:
        fg[key]=False
    for ans in la:
        problem_info[ans[0]].append([int(ans[2]), ans[1], ans[3], ans[4]])
        fg[ans[0]]=True
    for key in names:
        if fg[key]==False:
            problem_info[key].append([0,'null', 0.0, "null"])
text_count=dict()
#print(problem_info)
#print('--------------------------')
for key in problem_info.keys():
    #out=key
    sum_=0
    for ans in problem_info[key]:
        sum_+=ans[0]
        #print(ans)
        #out+=" [{:0>4d}字 {:0>2f}%]".format(ans[0], ans[2] * 100.0)
    text_count[key]=sum_
    #out+=str(sum_)
    #print(out)
#print('--------------------------')
text_count=dict(sorted(text_count.items(), key=lambda x: x[1], reverse=True))
#idx=1
#for key in names:
    #print('No.'+str(idx)+key[:-1]+"写了"+str(text_count[key])+'字')
    #idx+=1
left_count={}
for key in names:
    left_count[key]=0
for key in text_count.keys():
    for ans in problem_info[key]:
        if ans[0] == 0:
            left_count[key]+=1
last_time=0.0
last_count={}

if os.path.exists("output.xls"):
    workbook = xlrd.open_workbook(filename="output.xls")
    table=workbook.sheet_by_name(sheet_name='nb指数排行榜')
    if table.ncols > 5:
        last_time=float(table.cell_value(table.nrows-1, 1))
        for row in range(1, table.nrows - 1):
            last_count[table.cell_value(row, 2)]=int(table.cell_value(row, 3)[:-1])

workbook=xlwt.Workbook(encoding= 'utf-8')
worksheet2=workbook.add_sheet("nb指数排行榜")
line=1
idx=1
worksheet2.write(0, 0, "排名")
worksheet2.write(0, 1, "段位")
worksheet2.write(0, 2, "姓名")
worksheet2.write(0, 3, "字数")
worksheet2.write(0, 4, "是否写完")
worksheet2.write(0, 5, "剩余题数")
worksheet2.write(0, 6, "速度（字/时）")
for key in text_count.keys():
    worksheet2.write(line, 2, key[:-1])
    if left_count[key] == 0:
        worksheet2.write(line, 4, '√')
    else:
        worksheet2.write(line, 4, '×')
    worksheet2.write(line, 0, 'No.'+str(idx))
    worksheet2.write(line, 1, get_lv(int(text_count[key])))
    worksheet2.write(line, 3, str(text_count[key])+'字')
    worksheet2.write(line, 5, str(left_count[key]))
    if last_time != 0.0:
        speed = float(text_count[key]-last_count.get(key[:-1], 0)) / (time.time() - last_time) * 3600.0
        worksheet2.write(line, 6, speed)
    line+=1
    idx+=1
worksheet2.write(line, 0, '时间(用于python计算)')
worksheet2.write(line, 1, str(time.time()))
worksheet2.write(line, 2, 'by DSCS2009')

for key in text_count.keys():
    worksheet=workbook.add_sheet(key[:-1]+'的完成情况')
    worksheet.write(0, 0, "题号（从上往下数）")
    worksheet.write(0, 1, "是否写完")
    worksheet.write(0, 2, "字数")
    worksheet.write(0, 3, "借鉴率")
    worksheet.write(0, 4, "借鉴人")
    worksheet.write(0, 5, "内容")
    idx=1
    line=1
    for ans in problem_info[key]:
        worksheet.write(line, 0, str(idx))
        if ans[0] == 0:
            #left_count[key]+=1
            worksheet.write(line, 1, "×")
        else:
            worksheet.write(line, 1, "√")
        worksheet.write(line, 2, str(ans[0]))
        worksheet.write(line, 3, "{:0>2f}%".format(ans[2] * 100.0))
        worksheet.write(line, 4, ans[3])
        worksheet.write(line, 5, ans[1])
        line+=1
        idx+=1
workbook.save('output.xls')
    
