Python实现读取多个excel;以及统计词频;使用词典

news/2024/7/24 7:49:29 标签: python, 开发工具
# -*- coding: utf8 -*-
import xlrd
import codecs
import os
def handExcel(path):
#定义输出文件
output1 = codecs.open("7.8_userQue.txt","w","utf-8")
output2 = codecs.open("7.8_StandQue.txt","w","utf-8")

#定义输入文件
os.chdir(path)
names = os.listdir(path)

j = 1
for name in names:
print("读取第%d个excel"%j)
print(name)
bk = xlrd.open_workbook(name)
shxrange = range(bk.nsheets)
try:
sh = bk.sheet_by_name("Sheet1")
except:
print
"no sheet in %s named Sheet1" % name
# 获取行数
nrows = sh.nrows
# 获取列数
ncols = sh.ncols
print("nrows %d, ncols %d" % (nrows, ncols))
for i in range(1,nrows):
cell_value = sh.cell_value(i, 3)
cell_value1 = sh.cell_value(i, 4)
output1.write(cell_value+"\n")
output2.write(cell_value1 + "\n")
j += 1
handExcel("D:/Users/cassie.xiao/PycharmProjects/read_excel/three")



---------------------------------------统计词频----------------------------------
# -*- coding: utf8 -*-

import codecs
def getfreq(freqdict):
output1 = codecs.open("xiaoi_userQue_seg_hanlp.txt", "r", "utf-8")
print("getfreq....")
for line in output1.readlines():
line = line.split(" ")
for word in line:
if word in freqdict.keys():
freqdict[word] += 1
elif word not in freqdict.keys():
freqdict[word] = 1
return freqdict

def sort_out(dic,outfilename):
print("sort....")
sort = sorted(dic.items(), key = lambda item:item[1],reverse=True)
print("out....")
with codecs.open(outfilename,'w','utf-8') as f:
for each in sort:
f.write(each[0]+":"+str(each[1])+"\n")
def main():
freqdict = {}
freqdict1 = getfreq(freqdict)
outfilename = "xiaoi_userQue_seg_hanlp_freq.txt"
sort_out(freqdict1,outfilename)

if __name__ == "__main__":
main()

--------------------------------------使用词典操作-------------------------------------
# -*- coding: utf8 -*-

import codecs
def getfreq():
input_xiaoi_userQ = codecs.open(r"300W_xiaoi_jieba_UQ.txt", "r", "utf-8")
input_xiaoi_standQ = codecs.open(r"300W_xiaoi_jieba_SQ.txt", "r", "utf-8")
input_noun = codecs.open("noun.txt", "r", "utf-8")
output_xiaoi_standQ = codecs.open(r"freq_xiaoi&Noun_standQ.txt", "w", "utf-8")
output_xiaoi_userQ = codecs.open(r"freq_xiaoi&Noun_userQ.txt", "w", "utf-8")
print("getfreq....")
#先处理用户问
dict_userQ = {}
for line in input_xiaoi_userQ.readlines():
if not line.startswith(":"):
pair = line.strip().split(":")
dict_userQ[pair[0]] = pair[1]
for line in input_noun.readlines():
if dict_userQ.has_key(line.strip()):

ferq = dict_userQ.get(line.strip())
output_xiaoi_userQ.write(line.strip() + ":" + ferq + "\n")
output_xiaoi_userQ.close()

#处理标准问
dict_userQ1 = {}
for line in input_xiaoi_standQ.readlines():
if not line.startswith(":"):
pair = line.strip().split(":")
# print pair
dict_userQ1[pair[0]] = pair[1]
# print(len(dict_userQ1))
input_noun.seek(0)
for line in input_noun.readlines():
print line
if dict_userQ1.has_key(line.strip()):
print(line.strip())
ferq = dict_userQ1.get(line.strip())
output_xiaoi_standQ.write(line.strip() + ":" + ferq + "\n")
output_xiaoi_standQ.close()
getfreq()

转载于:https://www.cnblogs.com/maowuyu-xb/p/7421495.html


http://www.niftyadmin.cn/n/1585484.html

相关文章

【python】终于解决了cv2.VideoWriter生成视频后视频文件始终为1KB且无法播放的问题

把图片序列合成视频: import cv2 import imageio import ospath rD:\dataset\images dir_name os.listdir(path) for dir in dir_name:dir_path os.path.join(path, dir)img imageio.imread(os.path.join(dir_path, os.listdir(dir_path)[0]))vid_writer cv2.…

程序员大调查:年薪普遍超过30万 男性多半单身!

今天给大家分享的是一组程序员大调查的数据,数据表明程序员年薪普遍超过30万但是男性大多数多是单身!哈哈 ,看看你有没有中枪吧!这里还是要推荐下我自己建的前端学习群:659247717,如果你正在学习前端&#…

为什么深度学习中一般使用mean=[0.485, 0.456, 0.406]和std=[0.229, 0.224, 0.225]来归一化图像?

Q:图像预处理部分在resize后还会使用均值:image_mean[0.485,0.456,0.406],标准差:image_std[0.229,0.224,0.225]进行归一化,为什么使用这几个值? A:image_mean[0.485,0.456,0.406]、image_std[…

我所认知的 ANT DESIGN

一、引言 由于工作的关系对于 React 并没有过度的深入,因此对 Ant Design 并无太多了解。当然也是因为工作中存在大量的 Angular 项目,后来 ng-zorro-antd 的发布引起我极大的关注。 当我决定基于 ng-zorro-antd 发布一款企业后台管理模板 ng-alain 之前…

【SOT】SiamFC代码笔记

代码来源:https://github.com/huanglianghua/siamfc-pytorch 结合siamfc-pytorch代码讲解的三篇博客 下面是阅读上述SiamFC代码时的笔记 preprocess data(数据预处理) dataset.py 概括:通过index索引返回item (z, x, box_z, box_x),然后经…

wcf服务契约的重载

a. 服务端 1.服务端 契约用OperationContract的Name实现重载 using System; using System.Collections.Generic; using System.Linq; using System.Runtime.Serialization; using System.ServiceModel; using System.Text;namespace WCF.Chapter2.Overloading.Host{[ServiceCon…

【SOT】SiamRPN代码笔记

代码来源:https://github.com/laisimiao/siamrpn.pytorch 结合博客关于SiamRPN代码的一些要点 下面是阅读上述SiamRPN代码时的笔记 1、template frame和detection frame经过相同的Siamese Network得到一个feature,然后经过RPN的classification branch和regression b…

android使用Pull解析来自服务器的xml文件时出现错误以及解决方案

直接上代码,代码中有详细注释: 1 public class CheckUpdateManager {2 private static final String TAG "CheckUpdateManager";3 private ProgressDialog mWaitDialog;4 private Context mContext;5 private boolean mIsSho…