U
    ~&g-  ć                   @   s   d dl Z d dlZd dlmZ d dlmZ d dlmZmZmZ d dl	Z	G dd dZ
eeddd	Zed
krdZeeZede  dS )é    N)ŚPath)Śdatetime)ŚDictŚListŚOptionalc                   @   sZ   e Zd Zdd ZedddZedddZeeedd	d
ZeedddZ	dd Z
dS )ŚSECHumanReadableParserc                 C   s<   || _ tj |”}tj |”}tj |d| dd””| _d S )NŚhuman_readablez.txtz_human_readable.txt)Ś	file_pathŚosŚpathŚdirnameŚbasenameŚjoinŚreplaceŚoutput_path)Śselfr	   Zbase_dirŚ	file_name© r   ś3/home/kwaq/raporty/src/sec_human_readable_parser.pyŚ__init__	   s    
żzSECHumanReadableParser.__init__)Śreturnc              
   C   s0   t | jddd}| ” W  5 Q R £ S Q R X d S )NŚrśutf-8©Śencoding)Śopenr	   Śread)r   Śfr   r   r   Ś
_read_file   s    z!SECHumanReadableParser._read_filec                 C   s   t  d|  ” t j”}|si S | d”}|  |d”|  |d”|  |d”|  |d”|  |d”|  |d”|  |d	”|  |d
”ddS )u$   WyciÄga informacje z nagÅĆ³wka SECz<SEC-HEADER>(.*?)</SEC-HEADER>é   zCOMPANY CONFORMED NAME:\s*(.+)zFILED AS OF DATE:\s*(.+)z!CONFORMED SUBMISSION TYPE:\s*(.+)z"CONFORMED PERIOD OF REPORT:\s*(.+)zSTREET 1:\s*(.+)zCITY:\s*(.+)zSTATE:\s*(.+)zZIP:\s*(.+))ŚulicaŚmiastoŚstanŚkod)Śnazwa_firmyŚdata_raportuŚtyp_raportuŚokres_raportuŚadres)ŚreŚsearchr   ŚDOTALLŚgroupŚ_find_value)r   Zheader_sectionZheader_textr   r   r   Ś_extract_header_info   s"     ’








üūz+SECHumanReadableParser._extract_header_info)ŚtextŚpatternr   c                 C   s"   t  ||”}|r| d” ” S dS )Nr   Ś )r)   r*   r,   Śstrip)r   r/   r0   Zmatchr   r   r   r-   -   s    z"SECHumanReadableParser._find_value)r/   r   c                 C   s2   t  dd|”}t  dd|”}t  dd|”}| ” S )z#Usuwa tagi HTML i formatowanie XBRLz<[^>]+>ś z\{.*?\}r1   z\s+)r)   Śsubr2   )r   r/   r   r   r   Ś_clean_html1   s    z"SECHumanReadableParser._clean_htmlc              	   C   s^  |   ” }g }| d” | d” | d” | d” | d| d” ” | d| d” ” | d	| d
” ” | d| d” d” | di ”}| d” | | d” ” | | d” d| d” d| d” d” | d” | d” t d|  ” tj”}|r,|  | d””}| |” t	| j
ddd}| d |”” W 5 Q R X | j
S )u"   Generuje czytelnÄ wersjÄ raportuzP================================================================================zRAPORT SEC - WERSJA CZYTELNAzQ================================================================================
zPODSTAWOWE INFORMACJE:zNazwa firmy: r$   zData raportu: r%   zTyp raportu: r&   zOkres raportu: r'   Ś
r(   zADRES SIEDZIBY:r    r!   z, r"   r3   r#   u   TREÅÄ RAPORTU:zP--------------------------------------------------------------------------------z<TEXT>(.*?)</TEXT>r   Śwr   r   )r.   ŚappendŚgetr)   r*   r   r+   r5   r,   r   r   Śwriter   )r   Zheader_infoŚoutputr(   Zmain_contentZcleaned_contentr   r   r   r   Śgenerate_human_readable8   s.    




.


z.SECHumanReadableParser.generate_human_readableN)Ś__name__Ś
__module__Ś__qualname__r   Śstrr   r   r.   r-   r5   r<   r   r   r   r   r      s   r   )r	   r   c                 C   s   t | }| ” S )z8Funkcja pomocnicza do przetwarzania pojedynczego raportu)r   r<   )r	   Zparserr   r   r   Śprocess_report^   s    rA   Ś__main__z:reports/0001385849/2024-10-16_8-K_0001062993-24-017723.txtu%   Utworzono czytelnÄ wersjÄ raportu: )r)   ZjsonŚpathlibr   r   Ztypingr   r   r   r
   r   r@   rA   r=   Śreport_pathZhuman_readable_pathŚprintr   r   r   r   Ś<module>   s   V