Parsing

parse.py

from os import listdir
from os.path import join
from collection.models import Document


def process_data(data):

    doc = Document(content=data)
    doc.save()
    print("processing document", doc)


def tokenize():
    files = listdir("documents")
    files.sort()
    print(files)

    for path in files:
        with open(join("documents", path)) as f:
            data = f.read()
            process_data(data)


if __name__ == '__main__':
    tokenize()
    print("Done!")

  • Last modified: 2020/03/25 17:09