#!/usr/bin/python
#-*- coding:cp936 -*-
import re;
import os;
import sys;
if(__name__=="__main__"):
p=re.compile('(^\s+|\s+$)');
pdigit=re.compile('^\d+');
delimiter='\t';
strdict='dict.dat';
strcontigency='contigency.dat';
str_input=str(sys.argv[1]);
str_weight=str(sys.argv[2]);
str_output=str(sys.argv[3]);
fid_input=file(str_input,'r');
fid_output=file(str_output,'w');
keywords=[];
keywordstmp=fid_input.readlines();
for m in keywordstmp:
mykey=p.sub('',m);
keywords.append(mykey);
for keyword in keywords:
results=[];
#results.append(keyword);
command='';
command='grep -n ^%s$ %s'%(keyword,strdict);
tmpcol=os.popen(command).readlines();
temp=p.sub('',tmpcol[0]);
temps=temp.split(':');
linenum=int(temps[0])+1;
command="sed -n '%dp' %s"%(linenum,strdict);
tmpcol=os.popen(command).readlines();
temp=p.sub('',tmpcol[0]) ;#indicate howmany docs key indexed;
totalindexed=temp;
#results.append(temp);
command='grep -n ^%s$ %s'%(keyword,str_weight);
tmpcol=os.popen(command).readlines();
temp=p.sub('',tmpcol[0]);
temps=temp.split(':');
linenum=int(temps[0])+2;
command="sed -n '%dp' %s"%(linenum,str_weight);
tmpcol=os.popen(command).readlines();
temp=p.sub('',tmpcol[0]) ;
temps=temp.split(' ');
myclass=temps[0]
#results.append(myclass);
command="grep -n '^%s %s' %s"%(keyword,myclass,strcontigency);
tmpcol=os.popen(command).readlines();
temp=p.sub('',tmpcol[0]);
temps=temp.split(':');
temp1=p.sub('', temps[1]);
results=temp1.split(' ');
results.append(totalindexed);
#numin=m[1];
#numnotin=m[2];
#print numin;
#print numnotin;
#print results
#s=raw_input('please enter');
#results.append(numin);
#results.append(numnotin);
myline=delimiter.join(results);
fid_output.write(myline);
fid_output.write('\n');
print myline;
fid_input.close();
fid_output.close();
print '%s has finished, congratulations!'%str(sys.argv[0]);