# -*- coding: utf-8 -*-
import os, sort
def add_books(name, books):
with open(name, "r") as f:
text = f.read()
category = text[text.find("") + 4:text.find("")]
author_ind = text.find("")
if author_ind != -1:
author = text[author_ind + 18:text.find("")]
else:
author_ind = text.find("")
author = text[author_ind + 16:text.find("")]
publ_in = text[text.find("") + 9:text.find("")]
title = text[text.find("
") + 7:text.find("")]
if author != "":
string = author.decode("utf-8") + ". "
else:
string = "Без автора".decode("utf-8") + ". "
if publ_in != "":
string += publ_in.decode("utf-8") + ": "
string += title.decode("utf-8")
books[mapping[category]].append(sort.Word(string.encode("utf-8")))
if __name__ == '__main__':
CATS = "ABCDEFGHI"
PATH_IN = "../data/good/"
PATH_OUT = "../aux/"
mapping = {"A":0, "B":1, "C":2, "D":3, "E":4, "F":5, "G":6, "H":7, "I":8}
books = [[] for i in CATS]
for name in os.listdir(PATH_IN):
if name.endswith(".txt") and name[0] in CATS and name[1] == "_":
add_books(PATH_IN + name, books)
for i in CATS:
sort.quicksort(books[mapping[i]], 0, len(books[mapping[i]]) - 1)
with open(PATH_OUT + "!_white_list.txt", "w") as out:
for i in CATS:
out.write("-----------------------------\n")
if i == "A":
out.write(u"Преса:".encode("utf-8"))
elif i == "B":
out.write(u"Релігійна література:".encode("utf-8"))
elif i == "C":
out.write(u"Професійно-популярна література:".encode("utf-8"))
elif i == "D":
out.write(u"«Естетичні інформативні» тексти:".encode("utf-8"))
elif i == "E":
out.write(u"Адміністративні документи:".encode("utf-8"))
elif i == "F":
out.write(u"Науково-популярна література:".encode("utf-8"))
elif i == "G":
out.write(u"Наукова література:".encode("utf-8"))
elif i == "H":
out.write(u"Навчальна література:".encode("utf-8"))
elif i == "I":
out.write(u"Художня література:".encode("utf-8"))
out.write("\n-----------------------------\n")
if len(books[mapping[i]]) > 0:
out.write(books[mapping[i]][0].word)
out.write("\n")
for j in range(1, len(books[mapping[i]])):
if j != 0 and books[mapping[i]][j].word != books[mapping[i]][j - 1].word:
out.write(books[mapping[i]][j].word)
out.write("\n")
out.write("-----------------------------")