37 lines
1.6 KiB
Python
37 lines
1.6 KiB
Python
import os.path
|
|
import lxml
|
|
from lxml import etree
|
|
from modules.document.document_file import ExtractedDocument
|
|
from modules.helpers import xml
|
|
from modules.helpers.convert import Int
|
|
from datetime import datetime
|
|
|
|
|
|
class DocumentProps:
|
|
def __init__(self, extracted_document: ExtractedDocument):
|
|
if not extracted_document.documentroot or not os.path.isdir(extracted_document.documentroot):
|
|
self.parsed_ = False
|
|
return
|
|
|
|
self.parsed_ = True
|
|
|
|
core_xml = etree.parse(extracted_document.core)
|
|
app_xml = etree.parse(extracted_document.app)
|
|
|
|
self.extracted_document = extracted_document
|
|
self.application = xml.get_value(app_xml, '//Application')
|
|
self.paragraphs = Int(xml.get_value(app_xml, '//Paragraphs'))
|
|
self.lines = Int(xml.get_value(app_xml, '//Lines'))
|
|
self.characters = Int(xml.get_value(app_xml, '//Characters'))
|
|
self.words = Int(xml.get_value(app_xml, '//Words'))
|
|
self.pages = Int(xml.get_value(app_xml, '//Pages'))
|
|
self.total_time = Int(xml.get_value(app_xml, '//TotalTime'))
|
|
self.template = xml.get_value(app_xml, '//Template')
|
|
self.modified = datetime.strptime(xml.get_value(core_xml, 'dcterms:modified'), '%Y-%m-%dT%H:%M:%SZ')
|
|
self.created = datetime.strptime(xml.get_value(core_xml, 'dcterms:created'), '%Y-%m-%dT%H:%M:%SZ')
|
|
self.revision = xml.get_value(core_xml, 'cp:revision')
|
|
self.last_modified_by = xml.get_value(core_xml, 'cp:lastModifiedBy')
|
|
self.creator = xml.get_value(core_xml, 'dc:creator')
|
|
self.core_xml = core_xml
|
|
self.app_xml = app_xml
|