├── requirements.txt
├── .gitattributes
├── install_reqs.bat
├── udf_to_pdf.bat
├── docx_to_udf.bat
├── udf_to_docx.bat
├── scanned_pdf_to_udf.bat
├── docx_to_udf.py
├── image_processor.py
├── main.py
├── README.md
├── table_processor.py
├── utils.py
├── scanned_pdf_to_udf.py
├── .gitignore
├── paragraph_processor.py
├── udf_to_md.py
├── Docs.md
├── udf_to_pdf.py
└── udf_to_docx.py


/requirements.txt:
--------------------------------------------------------------------------------
1 | python-docx
2 | PyMuPDF
3 | Pillow


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | 


--------------------------------------------------------------------------------
/install_reqs.bat:
--------------------------------------------------------------------------------
 1 | @echo off
 2 | :: This script installs the packages listed in requirements.txt
 3 | 
 4 | REM Check if requirements file exists
 5 | IF NOT EXIST requirements.txt (
 6 |     echo Requirements file not found!
 7 |     exit /b 1
 8 | )
 9 | 
10 | REM Install the packages using pip
11 | pip install -r requirements.txt
12 | 
13 | REM Check if the installation was successful
14 | IF %ERRORLEVEL% NEQ 0 (
15 |     echo Failed to install some packages.
16 |     exit /b 1
17 | )
18 | 
19 | echo Packages installed successfully.
20 | pause


--------------------------------------------------------------------------------
/udf_to_pdf.bat:
--------------------------------------------------------------------------------
 1 | @echo off
 2 | :: This script converts a UDF file to PDF format
 3 | 
 4 | REM Check if a UDF file is provided by dragging
 5 | IF "%~1"=="" (
 6 |     echo Please drag a UDF file onto this script to convert it to PDF.
 7 |     pause
 8 |     exit /b 1
 9 | )
10 | 
11 | REM Run the conversion
12 | python udf_to_pdf.py "%~1"
13 | 
14 | REM Check if the conversion was successful
15 | IF %ERRORLEVEL% NEQ 0 (
16 |     echo Failed to convert UDF to PDF.
17 |     pause
18 |     exit /b 1
19 | )
20 | 
21 | echo UDF successfully converted to PDF.
22 | pause
23 | 


--------------------------------------------------------------------------------
/docx_to_udf.bat:
--------------------------------------------------------------------------------
 1 | @echo off
 2 | :: This script converts a DOCX file to UDF format
 3 | 
 4 | REM Check if a DOCX file is provided by dragging
 5 | IF "%~1"=="" (
 6 |     echo Please drag a DOCX file onto this script to convert it to UDF.
 7 |     pause
 8 |     exit /b 1
 9 | )
10 | 
11 | REM Run the conversion
12 | python docx_to_udf.py "%~1"
13 | 
14 | REM Check if the conversion was successful
15 | IF %ERRORLEVEL% NEQ 0 (
16 |     echo Failed to convert DOCX to UDF.
17 |     pause
18 |     exit /b 1
19 | )
20 | 
21 | echo DOCX successfully converted to UDF.
22 | pause
23 | 


--------------------------------------------------------------------------------
/udf_to_docx.bat:
--------------------------------------------------------------------------------
 1 | @echo off
 2 | :: This script converts a UDF file to DOCX format
 3 | 
 4 | REM Check if a UDF file is provided by dragging
 5 | IF "%~1"=="" (
 6 |     echo Please drag a UDF file onto this script to convert it to DOCX.
 7 |     pause
 8 |     exit /b 1
 9 | )
10 | 
11 | REM Run the conversion
12 | python udf_to_docx.py "%~1"
13 | 
14 | REM Check if the conversion was successful
15 | IF %ERRORLEVEL% NEQ 0 (
16 |     echo Failed to convert UDF to DOCX.
17 |     pause
18 |     exit /b 1
19 | )
20 | 
21 | echo UDF successfully converted to DOCX.
22 | pause
23 | 


--------------------------------------------------------------------------------
/scanned_pdf_to_udf.bat:
--------------------------------------------------------------------------------
 1 | @echo off
 2 | :: This script converts a scanned PDF file to UDF format
 3 | 
 4 | REM Check if a PDF file is provided by dragging
 5 | IF "%~1"=="" (
 6 |     echo Please drag a PDF file onto this script to convert it to UDF.
 7 |     pause
 8 |     exit /b 1
 9 | )
10 | 
11 | REM Run the conversion
12 | python scanned_pdf_to_udf.py "%~1"
13 | 
14 | REM Check if the conversion was successful
15 | IF %ERRORLEVEL% NEQ 0 (
16 |     echo Failed to convert scanned PDF to UDF.
17 |     pause
18 |     exit /b 1
19 | )
20 | 
21 | echo Scanned PDF successfully converted to UDF.
22 | pause
23 | 


--------------------------------------------------------------------------------
/docx_to_udf.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | from main import main
 4 | 
 5 | def docx_to_udf():
 6 |     if len(sys.argv) < 2:
 7 |         print("Usage: python main.py input.docx")
 8 |         sys.exit(1)
 9 | 
10 |     input_file = sys.argv[1]
11 | 
12 |     if not os.path.isfile(input_file):
13 |         print(f"Input file not found: {input_file}")
14 |         sys.exit(1)
15 | 
16 |     filename, ext = os.path.splitext(input_file)
17 | 
18 |     if ext.lower() == '.docx':
19 |         udf_file = filename + '.udf'
20 |         main(input_file, udf_file)
21 |     else:
22 |         print("Please provide a .docx file.")
23 |         sys.exit(1)
24 | 
25 | if __name__ == '__main__':
26 |     docx_to_udf()
27 | 


--------------------------------------------------------------------------------
/image_processor.py:
--------------------------------------------------------------------------------
 1 | import base64
 2 | from docx.oxml.ns import qn
 3 | from PIL import Image
 4 | import io
 5 | 
 6 | def process_image(drawing, document):
 7 |     try:
 8 |         inline = drawing.find('.//wp:inline', namespaces={'wp': 'http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing'})
 9 |         anchor = drawing.find('.//wp:anchor', namespaces={'wp': 'http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing'})
10 |         
11 |         extent = None
12 |         if inline is not None:
13 |             extent = inline.find('.//wp:extent', namespaces={'wp': 'http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing'})
14 |         elif anchor is not None:
15 |             extent = anchor.find('.//wp:extent', namespaces={'wp': 'http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing'})
16 |         
17 |         if extent is not None:
18 |             width = int(extent.get('cx')) // 9525
19 |             height = int(extent.get('cy')) // 9525
20 |         else:
21 |             width = height = 100
22 | 
23 |         blip = drawing.find('.//a:blip', namespaces={'a': 'http://schemas.openxmlformats.org/drawingml/2006/main'})
24 |         if blip is not None:
25 |             rId = blip.get(qn('r:embed'))
26 |             if rId in document.part.rels:
27 |                 image_part = document.part.rels[rId].target_part
28 |                 image_bytes = image_part.blob
29 |                 
30 |                 try:
31 |                     with Image.open(io.BytesIO(image_bytes)) as img:
32 |                         png_buffer = io.BytesIO()
33 |                         img.save(png_buffer, format='PNG')
34 |                         png_bytes = png_buffer.getvalue()
35 |                         image_data = base64.b64encode(png_bytes).decode('utf-8')
36 |                 except Exception:
37 |                     image_data = base64.b64encode(image_bytes).decode('utf-8')
38 |                 
39 |                 return image_data, width, height
40 | 
41 |     except Exception:
42 |         pass
43 |     
44 |     return None, None, None


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | import zipfile
 2 | from docx import Document
 3 | from paragraph_processor import process_paragraph
 4 | from table_processor import process_table
 5 | 
 6 | def main(docx_file, udf_file):
 7 |     udf_template = '''<?xml version="1.0" encoding="UTF-8" ?>
 8 | <template format_id="1.8">
 9 | <content><![CDATA[{content}]]></content>
10 | <properties><pageFormat mediaSizeName="1" leftMargin="42.51968479156494" rightMargin="28.34645652770996" topMargin="14.17322826385498" bottomMargin="14.17322826385498" paperOrientation="1" headerFOffset="20.0" footerFOffset="20.0" /></properties>
11 | <elements resolver="hvl-default">
12 | {elements}
13 | </elements>
14 | <styles><style name="default" description="Geçerli" family="Dialog" size="12" bold="false" italic="false" foreground="-13421773" FONT_ATTRIBUTE_KEY="javax.swing.plaf.FontUIResource[family=Dialog,name=Dialog,style=plain,size=12]" /><style name="hvl-default" family="Times New Roman" size="12" description="Gövde" /></styles>
15 | </template>'''
16 | 
17 |     try:
18 |         document = Document(docx_file)
19 |     except Exception as e:
20 |         print(f"Error loading DOCX file: {e}")
21 |         return
22 | 
23 |     content = []
24 |     elements = []
25 |     current_offset = 0
26 |     EMPTY_PARAGRAPH_PLACEHOLDER = '\u200B'  # Zero-width space
27 | 
28 |     for element in document.element.body:
29 |         if element.tag.endswith('p'):  # Paragraph
30 |             para_text, para_elements = process_paragraph(element, document, current_offset)
31 |             elements.append(para_elements)
32 |             content.append(para_text)
33 |             current_offset += len(para_text)
34 |         elif element.tag.endswith('tbl'):  # Table
35 |             table_text, table_element = process_table(element, document, current_offset)
36 |             elements.append(table_element)
37 |             content.append(table_text)
38 |             current_offset += len(table_text)
39 | 
40 |     # Ensure there's at least one paragraph after the table
41 |     if not content:
42 |         content.append(EMPTY_PARAGRAPH_PLACEHOLDER)
43 |         elements.append(f'<paragraph Alignment="0" LeftIndent="0.0" RightIndent="0.0"><content startOffset="{current_offset}" length="1" /></paragraph>')
44 | 
45 |     udf_content = udf_template.format(
46 |         content=''.join(content),
47 |         elements='\n'.join(elements)
48 |     )
49 | 
50 |     try:
51 |         with zipfile.ZipFile(udf_file, 'w', zipfile.ZIP_DEFLATED) as zipf:
52 |             zipf.writestr('content.xml', udf_content)
53 |         print(f"UDF file created successfully: {udf_file}")
54 |     except Exception as e:
55 |         print(f"Error creating UDF file: {e}")
56 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # UDF Toolkit
 2 |  UYAP UDF dosya formatı ile ilgili çalışmalar
 3 | 
 4 | [![Star History Chart](https://api.star-history.com/svg?repos=saidsurucu/udf-toolkit&type=Date)](https://www.star-history.com/#saidsurucu/udf-toolkit&Date)
 5 | 
 6 | ## UDF dosyasını DOCX formatına çevirmek için
 7 | ```
 8 | python udf_to_docx.py input.udf
 9 | ```
10 | ## UDF dosyasını PDF formatına çevirmek için
11 | ```
12 | python udf_to_pdf.py input.udf
13 | ```
14 | ## DOCX dosyasını UDF formatına çevirmek için
15 | ```
16 | python docx_to_udf.py input.docx
17 | ```
18 | Not: En iyi sonucu almak için Windows'ta çalıştırılmalıdır. Bazı DOCX özelliklerini dönüştürmek için Windows kütüphaneleri gereklidir. MacOS ve Linux'ta sonuçlar farklı olabilir.
19 | ## PDF dosyasını (imaj olarak) UDF formatına çevirmek için
20 | ```
21 | python scanned_pdf_to_udf.py input.pdf
22 | ```
23 | # Teknik Bilgiye Sahip Olmayanlar İçin Windows'ta Kullanım Talimatları
24 | 
25 | Bu scriptlerin düzgün çalışabilmesi için Python'un sisteminizde kurulu olması gerekmektedir. Aşağıdaki adımları takip ederek Python'u yükleyebilirsiniz:
26 | 
27 | 1. [Python'un resmi web sitesine](https://www.python.org/downloads/) gidin.
28 | 2. Sisteminizin işletim sistemine uygun Python sürümünü indirin (genellikle en son sürüm önerilir).
29 | 3. Kurulum sırasında "Add Python to PATH" seçeneğini işaretleyin.
30 | 
31 | ## Kodu İndirmek
32 | Sağ üstteki yeşil renkli `Code` butonuna tıklayın. `Download ZIP`'e tıklayın. İnen sıkıştırılmış ZIP dosyasını bir klasöre çıkartın.
33 | 
34 | ### 1. `install_requirements.bat`
35 | - **Amaç**: `requirements.txt` dosyasında listelenen gerekli Python paketlerini yükler.
36 | - **Nasıl Kullanılır**: `install_requirements.bat` scriptine çift tıklayın. Bu, `requirements.txt` dosyasında belirtilen tüm gerekli bağımlılıkları yükleyecektir.
37 | 
38 | ### 1. `udf_to_docx.bat`
39 | - **Amaç**: UDF dosyasını DOCX formatına dönüştürür.
40 | - **Nasıl Kullanılır**: `.udf` dosyasını `udf_to_docx.bat` scriptinin üzerine sürükleyin. Script çalışacak ve girdi ile aynı dizinde bir `.docx` dosyası oluşturacaktır.
41 | 
42 | ### 2. `udf_to_pdf.bat`
43 | - **Amaç**: UDF dosyasını PDF formatına dönüştürür.
44 | - **Nasıl Kullanılır**: `.udf` dosyasını `udf_to_pdf.bat` scriptinin üzerine sürükleyin. Script çalışacak ve girdi ile aynı dizinde bir `.pdf` dosyası oluşturacaktır.
45 | 
46 | ### 3. `docx_to_udf.bat`
47 | - **Amaç**: DOCX dosyasını UDF formatına dönüştürür.
48 | - **Nasıl Kullanılır**: `.docx` dosyasını `docx_to_udf.bat` scriptinin üzerine sürükleyin. Script çalışacak ve girdi ile aynı dizinde bir `.udf` dosyası oluşturacaktır.
49 | 
50 | ### 4. `scanned_pdf_to_udf.bat`
51 | - **Amaç**: Tarama yapılmış bir PDF dosyasını UDF formatına dönüştürür.
52 | - **Nasıl Kullanılır**: `.pdf` dosyasını `scanned_pdf_to_udf.bat` scriptinin üzerine sürükleyin. Script çalışacak ve girdi ile aynı dizinde bir `.udf` dosyası oluşturacaktır.
53 | 
54 | 
55 | ## UDF Formatı Dokümantasyonu
56 | [Docs.md](./Docs.md)
57 | 


--------------------------------------------------------------------------------
/table_processor.py:
--------------------------------------------------------------------------------
 1 | from docx.oxml.ns import qn
 2 | from paragraph_processor import process_paragraph
 3 | 
 4 | def process_table(table, document, current_offset):
 5 |     table_text = ""
 6 |     rows = []
 7 |     grid_cols = table.findall('.//w:gridCol', namespaces={'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'})
 8 |     column_count = len(grid_cols)
 9 |     
10 |     # Calculate column widths
11 |     total_width = sum(int(col.get(qn('w:w'), '0')) for col in grid_cols)
12 |     column_widths = [int(col.get(qn('w:w'), '0')) for col in grid_cols]
13 |     column_spans = ",".join([str(int((width / total_width) * 300)) for width in column_widths])  # Scale to 300
14 | 
15 |     # Check table borders
16 |     tblBorders = table.find('.//w:tblBorders', namespaces={'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'})
17 |     border_type = "borderCell"  # Default to visible borders
18 |     if tblBorders is not None:
19 |         border_elements = ['top', 'left', 'bottom', 'right', 'insideH', 'insideV']
20 |         all_borders_none = all(
21 |             tblBorders.find(f'.//w:{border}', namespaces={'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'}) is None or
22 |             tblBorders.find(f'.//w:{border}', namespaces={'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'}).get(qn('w:val')) in ['none', 'nil', '0']
23 |             for border in border_elements
24 |         )
25 |         if all_borders_none:
26 |             border_type = "borderNone"
27 |     else:
28 |         # If tblBorders is not defined, assume borderless table
29 |         border_type = "borderNone"
30 | 
31 |     for row_index, row in enumerate(table.findall('.//w:tr', namespaces={'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'})):
32 |         cells = []
33 |         for cell in row.findall('.//w:tc', namespaces={'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'}):
34 |             cell_text, cell_elements = process_cell(cell, document, current_offset)
35 |             cells.append(f'<cell>{"".join(cell_elements)}</cell>')
36 |             table_text += cell_text
37 |             current_offset += len(cell_text)
38 | 
39 |         rows.append(f'<row rowName="row{row_index + 1}" rowType="dataRow">{"".join(cells)}</row>')
40 | 
41 |     table_element = f'<table tableName="Sabit" columnCount="{column_count}" columnSpans="{column_spans}" border="{border_type}">{"".join(rows)}</table>'
42 |     return table_text, table_element
43 | 
44 | 
45 | def process_cell(cell, document, current_offset):
46 |     cell_text = ""
47 |     cell_elements = []
48 |     paragraphs = cell.findall('.//w:p', namespaces={'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'})
49 |     
50 |     for i, paragraph in enumerate(paragraphs):
51 |         para_text, para_elements = process_paragraph(paragraph, document, current_offset)
52 |         cell_text += para_text
53 |         cell_elements.extend(para_elements)
54 |         current_offset += len(para_text)
55 |         
56 |         # Add a line break between paragraphs, but not after the last paragraph
57 |         if i < len(paragraphs) - 1 and para_text.strip():
58 |             cell_text += '\n'
59 |             cell_elements.append(f'<content startOffset="{current_offset}" length="1" family="Times New Roman" size="10" />')
60 |             current_offset += 1
61 | 
62 |     # If cell is empty, add a space character
63 |     if not cell_text:
64 |         cell_text = " "
65 |         cell_elements.append(f'<content startOffset="{current_offset}" length="1" family="Times New Roman" size="10" />')
66 |         current_offset += 1
67 | 
68 |     return cell_text, cell_elements
69 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
 1 | from docx.oxml.ns import qn
 2 | 
 3 | def get_alignment(paragraph):
 4 |     alignment = paragraph.find('.//w:jc', namespaces={'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'})
 5 |     if alignment is not None:
 6 |         val = alignment.get(qn('w:val'))
 7 |         if val == 'center':
 8 |             return '1'
 9 |         elif val == 'right':
10 |             return '2'
11 |         elif val == 'both':
12 |             return '3'
13 |     return '0'  # Default to Left
14 | 
15 | def get_indent_attrs(paragraph):
16 |     ind = paragraph.find('.//w:ind', namespaces={'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'})
17 |     left = ind.get(qn('w:left')) if ind is not None else None
18 |     right = ind.get(qn('w:right')) if ind is not None else None
19 |     firstLine = ind.get(qn('w:firstLine')) if ind is not None else None
20 |     
21 |     indent_attrs = f'LeftIndent="{float(left) / 20 if left else 0.0}" RightIndent="{float(right) / 20 if right else 0.0}"'
22 |     
23 |     if firstLine:
24 |         indent_attrs += f' FirstLineIndent="{float(firstLine) / 20}"'
25 |     
26 |     return indent_attrs
27 | 
28 | def get_bullet_attrs(paragraph):
29 |     numPr = paragraph.find('.//w:numPr', namespaces={'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'})
30 |     if numPr is not None:
31 |         ilvl = numPr.find('.//w:ilvl', namespaces={'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'})
32 |         numId = numPr.find('.//w:numId', namespaces={'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'})
33 |         if ilvl is not None and numId is not None:
34 |             bullet_type = get_bullet_type(numId.get(qn("w:val")))
35 |             return f'Bulleted="true" ListId="{numId.get(qn("w:val"))}" ListLevel="{int(ilvl.get(qn("w:val"))) + 1}" BulletType="{bullet_type}"'
36 |     return ''
37 | 
38 | def get_bullet_type(num_id):
39 |     bullet_types = {
40 |         "1": "BULLET_TYPE_ELLIPSE",
41 |         "2": "BULLET_TYPE_RECTANGLE",
42 |         "3": "BULLET_TYPE_RECTANGLE_D",
43 |         "4": "BULLET_TYPE_ARROW",
44 |         "5": "BULLET_TYPE_DIAMOND",
45 |         "6": "BULLET_TYPE_TRIANGLE",
46 |     }
47 |     return bullet_types.get(num_id, "BULLET_TYPE_ELLIPSE")  # Default to ELLIPSE
48 | 
49 | def get_font_properties(run):
50 |     font_family = run.findtext('.//w:rFonts[@w:ascii]', namespaces={'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'}) or "Times New Roman"
51 |     font_size = run.findtext('.//w:sz', namespaces={'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'}) or "20"
52 |     font_size = str(int(font_size) // 2)  # Convert half-points to points
53 | 
54 |     style_attrs = [f'family="{font_family}"', f'size="{font_size}"']
55 |     if run.find('.//w:b', namespaces={'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'}) is not None:
56 |         style_attrs.append('bold="true"')
57 |     if run.find('.//w:i', namespaces={'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'}) is not None:
58 |         style_attrs.append('italic="true"')
59 | 
60 |     return ' '.join(style_attrs)
61 | 
62 | def get_line_spacing(paragraph):
63 |     spacing = paragraph.find('.//w:spacing', namespaces={'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'})
64 |     if spacing is not None:
65 |         line = spacing.get(qn('w:line'))
66 |         lineRule = spacing.get(qn('w:lineRule'))
67 |         if line and lineRule:
68 |             if lineRule == 'auto':
69 |                 # Convert to UDF line spacing (DOCX 2.0 = UDF 1.0)
70 |                 return max(0, (float(line) / 240) - 1)
71 |             elif lineRule == 'exact' or lineRule == 'atLeast':
72 |                 # Convert twips to points and adjust for UDF
73 |                 return max(0, (float(line) / 20) - 12)
74 |     return 0.0  # Default to single spacing in UDF
75 | 


--------------------------------------------------------------------------------
/scanned_pdf_to_udf.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | import zipfile
 4 | import base64
 5 | import fitz  # PyMuPDF
 6 | import io
 7 | from PIL import Image
 8 | 
 9 | def pdf_to_udf(pdf_file, udf_file):
10 |     udf_template = '''<?xml version="1.0" encoding="UTF-8" ?>
11 | <template format_id="1.8">
12 | <content><![CDATA[{content}]]></content>
13 | <properties><pageFormat mediaSizeName="1" leftMargin="42.51968479156494" rightMargin="28.34645652770996" topMargin="14.17322826385498" bottomMargin="14.17322826385498" paperOrientation="1" headerFOffset="20.0" footerFOffset="20.0" /></properties>
14 | <elements resolver="hvl-default">
15 | {elements}
16 | </elements>
17 | <styles><style name="default" description="Geçerli" family="Dialog" size="12" bold="false" italic="false" foreground="-13421773" FONT_ATTRIBUTE_KEY="javax.swing.plaf.FontUIResource[family=Dialog,name=Dialog,style=plain,size=12]" /><style name="hvl-default" family="Times New Roman" size="12" description="Gövde" /></styles>
18 | </template>'''
19 | 
20 |     try:
21 |         pdf_document = fitz.open(pdf_file)
22 |         content = []
23 |         elements = []
24 |         current_offset = 0
25 | 
26 |         for page_num in range(len(pdf_document)):
27 |             page = pdf_document[page_num]
28 |             
29 |             # Extract text
30 |             text = page.get_text()
31 |             if text:
32 |                 content.append(text)
33 |                 elements.append(f'<paragraph Alignment="0" LeftIndent="0.0" RightIndent="0.0"><content startOffset="{current_offset}" length="{len(text)}" /></paragraph>')
34 |                 current_offset += len(text)
35 |             
36 |             # Extract images
37 |             image_list = page.get_images(full=True)
38 |             for img_index, img in enumerate(image_list):
39 |                 xref = img[0]
40 |                 base_image = pdf_document.extract_image(xref)
41 |                 image_bytes = base_image["image"]
42 |                 
43 |                 # Convert image to base64
44 |                 image = Image.open(io.BytesIO(image_bytes))
45 |                 buffered = io.BytesIO()
46 |                 image.save(buffered, format="PNG")
47 |                 img_str = base64.b64encode(buffered.getvalue()).decode()
48 |                 
49 |                 # Add placeholder for image in content
50 |                 placeholder = '\uFFFC'  # Object Replacement Character
51 |                 content.append(placeholder)
52 |                 
53 |                 # Add image element
54 |                 elements.append(f'<image family="Times New Roman" size="10" imageData="{img_str}" startOffset="{current_offset}" length="1" />')
55 |                 current_offset += 1
56 |             
57 |             # Add a newline between pages
58 |             content.append('\n')
59 |             elements.append(f'<paragraph Alignment="0" LeftIndent="0.0" RightIndent="0.0"><content startOffset="{current_offset}" length="1" /></paragraph>')
60 |             current_offset += 1
61 | 
62 |         udf_content = udf_template.format(
63 |             content=''.join(content),
64 |             elements='\n'.join(elements)
65 |         )
66 | 
67 |         with zipfile.ZipFile(udf_file, 'w', zipfile.ZIP_DEFLATED) as zipf:
68 |             zipf.writestr('content.xml', udf_content)
69 |         print(f"UDF file created successfully: {udf_file}")
70 |     except Exception as e:
71 |         print(f"Error creating UDF file: {e}")
72 | 
73 | def main():
74 |     if len(sys.argv) < 2:
75 |         print("Usage: python pdf_to_udf.py input.pdf")
76 |         sys.exit(1)
77 | 
78 |     input_file = sys.argv[1]
79 | 
80 |     if not os.path.isfile(input_file):
81 |         print(f"Input file not found: {input_file}")
82 |         sys.exit(1)
83 | 
84 |     filename, ext = os.path.splitext(input_file)
85 | 
86 |     if ext.lower() == '.pdf':
87 |         udf_file = filename + '.udf'
88 |         pdf_to_udf(input_file, udf_file)
89 |     else:
90 |         print("Please provide a .pdf file.")
91 |         sys.exit(1)
92 | 
93 | if __name__ == '__main__':
94 |     main()


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | ~$test.docx
162 | .DS_Store
163 | .python-version
164 | hello.py
165 | pdf-test.pdf
166 | pyproject.toml
167 | *.udf
168 | *.lock
169 | *.pdf
170 | *.xml
171 | *.png
172 | *.docx
173 | 


--------------------------------------------------------------------------------
/paragraph_processor.py:
--------------------------------------------------------------------------------
  1 | from docx.oxml.ns import qn
  2 | from image_processor import process_image
  3 | from utils import get_alignment, get_indent_attrs, get_bullet_attrs
  4 | 
  5 | def process_paragraph(paragraph, document, current_offset):
  6 |     EMPTY_PARAGRAPH_PLACEHOLDER = '\u200B'  # Zero-width space
  7 |     TAB_CHARACTER = '\t'  # Tab character
  8 |     
  9 |     para_text = ""
 10 |     para_elements = []
 11 |     
 12 |     # Numaralandırma ve madde işareti özelliklerini al
 13 |     numPr = paragraph.find('.//w:numPr', namespaces={'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'})
 14 |     numbered = False
 15 |     list_id = ""
 16 |     list_level = ""
 17 |     number_type = ""
 18 |     
 19 |     if numPr is not None:
 20 |         ilvl = numPr.find('.//w:ilvl', namespaces={'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'})
 21 |         numId = numPr.find('.//w:numId', namespaces={'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'})
 22 |         if ilvl is not None and numId is not None:
 23 |             numbered = True
 24 |             list_id = numId.get(qn("w:val"))
 25 |             list_level = str(int(ilvl.get(qn("w:val"))) + 1)
 26 |             number_type = get_number_type(list_id)
 27 |     
 28 |     for run in paragraph.findall('.//w:r', namespaces={'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'}):
 29 |         # Process images in the run
 30 |         drawing_elements = run.findall('.//w:drawing', namespaces={'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'})
 31 |         if drawing_elements:
 32 |             for drawing in drawing_elements:
 33 |                 image_data, width, height = process_image(drawing, document)
 34 |                 if image_data:
 35 |                     # Insert a placeholder character in content
 36 |                     placeholder = '\uFFFC'  # Object Replacement Character
 37 |                     para_text += placeholder
 38 | 
 39 |                     # Add image element
 40 |                     para_elements.append(
 41 |                         f'<image imageData="{image_data}" '
 42 |                         f'startOffset="{current_offset}" length="1" width="{width}" height="{height}" />'
 43 |                     )
 44 |                     current_offset += 1
 45 |                 else:
 46 |                     print("Failed to process image, skipping...")
 47 | 
 48 | 
 49 | 
 50 |         # Process text and tab characters in the run
 51 |         text = run.findtext('.//w:t', namespaces={'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'}) or ''
 52 |         if text or run.find('.//w:tab', namespaces={'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'}) is not None:
 53 |             # Get font properties
 54 |             font_family = run.findtext('.//w:rFonts[@w:ascii]', namespaces={'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'}) or "Times New Roman"
 55 |             font_size = run.findtext('.//w:sz', namespaces={'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'}) or "20"
 56 |             font_size = str(int(font_size) // 2)  # Convert half-points to points
 57 | 
 58 |             style_attrs = [f'family="{font_family}"', f'size="{font_size}"']
 59 |             if run.find('.//w:b', namespaces={'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'}) is not None:
 60 |                 style_attrs.append('bold="true"')
 61 |             if run.find('.//w:i', namespaces={'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'}) is not None:
 62 |                 style_attrs.append('italic="true"')
 63 | 
 64 |             style_attr_str = ' '.join(style_attrs)
 65 | 
 66 |             # Process text and tab characters
 67 |             for child in run:
 68 |                 if child.tag.endswith('}t'):  # Text
 69 |                     para_elements.append(f'<content startOffset="{current_offset}" length="{len(child.text)}" {style_attr_str} />')
 70 |                     para_text += child.text
 71 |                     current_offset += len(child.text)
 72 |                 elif child.tag.endswith('}tab'):  # Tab
 73 |                     para_elements.append(f'<tab {style_attr_str} startOffset="{current_offset}" length="1" />')
 74 |                     para_text += TAB_CHARACTER
 75 |                     current_offset += 1
 76 | 
 77 |     # If paragraph is empty, add placeholder
 78 |     if not para_text:
 79 |         para_text = EMPTY_PARAGRAPH_PLACEHOLDER
 80 |         para_elements.append(f'<content startOffset="{current_offset}" length="1" family="Times New Roman" size="10" />')
 81 |         current_offset += 1
 82 | 
 83 |     # Numaralandırma ve madde işareti özelliklerini paragraf elementine ekle
 84 |     paragraph_attrs = f'Alignment="{get_alignment(paragraph)}" {get_indent_attrs(paragraph)}'
 85 |     if numbered:
 86 |         if number_type.startswith("NUMBER_TYPE_"):
 87 |             paragraph_attrs += f' Numbered="true" ListId="{list_id}" ListLevel="{list_level}" NumberType="{number_type}"'
 88 |         else:
 89 |             paragraph_attrs += f' Bulleted="true" ListId="{list_id}" ListLevel="{list_level}" BulletType="{number_type}"'
 90 | 
 91 |     paragraph_element = f'<paragraph {paragraph_attrs}>{"".join(para_elements)}</paragraph>'
 92 |     return para_text, paragraph_element
 93 | 
 94 | def get_number_type(list_id):
 95 |     # Bu fonksiyonu, belgenizin numaralandırma tanımlarına göre özelleştirmeniz gerekebilir
 96 |     number_types = {
 97 |         "1": "NUMBER_TYPE_CHAR_SMALL_DOT",
 98 |         "2": "BULLET_TYPE_ARROW",
 99 |         "3": "NUMBER_TYPE_ROMAN_BIG_DOT",
100 |         "4": "NUMBER_TYPE_CHAR_BIG_DOT",
101 |         "5": "NUMBER_TYPE_CHAR_SMALL_PARANTHESE",
102 |         "6": "NUMBER_TYPE_NUMBER_TRE",
103 |         "7": "NUMBER_TYPE_ROMAN_SMALL_DOT",
104 |         "8": "BULLET_TYPE_ELLIPSE",
105 |         "9": "BULLET_TYPE_RECTANGLE",
106 |         "10": "BULLET_TYPE_RECTANGLE_D",
107 |         "11": "NUMBER_TYPE_NUMBER_PARANTHESE",
108 |         "12": "BULLET_TYPE_DIAMOND",
109 |         "13": "BULLET_TYPE_TRIANGLE",
110 |         # Diğer numaralandırma ve madde işareti türlerini buraya ekleyin
111 |     }
112 |     return number_types.get(list_id, "NUMBER_TYPE_NUMBER_TRE")  # Varsayılan olarak NUMBER_TYPE_NUMBER_TRE kullan
113 | 


--------------------------------------------------------------------------------
/udf_to_md.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import os
  3 | import xml.etree.ElementTree as ET
  4 | import zipfile
  5 | import base64
  6 | import io
  7 | 
  8 | def is_zip_file(file_path):
  9 |     """Check if the file is a valid ZIP file"""
 10 |     try:
 11 |         with zipfile.ZipFile(file_path, 'r') as z:
 12 |             return True
 13 |     except zipfile.BadZipFile:
 14 |         return False
 15 | 
 16 | def udf_to_markdown(udf_file):
 17 |     root = None
 18 |     
 19 |     # Check if the file is a ZIP file
 20 |     if is_zip_file(udf_file):
 21 |         # Process as a ZIP file
 22 |         with zipfile.ZipFile(udf_file, 'r') as z:
 23 |             if 'content.xml' in z.namelist():
 24 |                 with z.open('content.xml') as content_file:
 25 |                     tree = ET.parse(content_file)
 26 |                     root = tree.getroot()
 27 |             else:
 28 |                 print("The 'content.xml' file could not be found in the UDF file.")
 29 |                 exit()
 30 |     else:
 31 |         # Process as an XML file directly
 32 |         try:
 33 |             tree = ET.parse(udf_file)
 34 |             root = tree.getroot()
 35 |         except ET.ParseError:
 36 |             print(f"The file {udf_file} is neither a valid ZIP nor a valid XML file.")
 37 |             exit()
 38 | 
 39 |     if root is None:
 40 |         print("Failed to parse the file.")
 41 |         exit()
 42 | 
 43 |     # Initialize the markdown output
 44 |     markdown_output = ""
 45 | 
 46 |     # Create a dictionary for style definitions
 47 |     styles = {}
 48 | 
 49 |     # Retrieve style information
 50 |     styles_element = root.find('styles')
 51 |     if styles_element is not None:
 52 |         for style in styles_element.findall('style'):
 53 |             style_name = style.get('name')
 54 |             style_attributes = {
 55 |                 'family': style.get('family'),
 56 |                 'size': int(style.get('size', 12)),
 57 |                 'bold': style.get('bold', 'false') == 'true',
 58 |                 'italic': style.get('italic', 'false') == 'true',
 59 |                 'foreground': int(style.get('foreground', '-13421773')),
 60 |             }
 61 |             styles[style_name] = style_attributes
 62 | 
 63 |     # Retrieve content text
 64 |     content_element = root.find('content')
 65 |     if content_element is not None:
 66 |         content_text = content_element.text
 67 |         if content_text and content_text.startswith('<![CDATA[') and content_text.endswith(']]>'):
 68 |             content_text = content_text[9:-3]
 69 |     else:
 70 |         print("'content' could not be found in the XML.")
 71 |         exit()
 72 | 
 73 |     # Process the 'elements' section
 74 |     elements_element = root.find('elements')
 75 |     
 76 |     if elements_element is not None:
 77 |         for elem in elements_element:
 78 |             if elem.tag == 'paragraph':
 79 |                 # Handle the paragraph
 80 |                 paragraph_text = ""
 81 |                 
 82 |                 # Set paragraph alignment (we'll add this as HTML in markdown since markdown doesn't have native alignment)
 83 |                 alignment = elem.get('Alignment', '0')
 84 |                 alignment_tag = ""
 85 |                 if alignment == '1':
 86 |                     alignment_tag = "<div align='center'>"
 87 |                 elif alignment == '2':
 88 |                     alignment_tag = "<div align='right'>"
 89 |                 elif alignment == '3':
 90 |                     alignment_tag = "<div align='justify'>"
 91 |                 
 92 |                 # Process the paragraph content
 93 |                 for child in elem:
 94 |                     if child.tag == 'content':
 95 |                         # Get the text
 96 |                         start_offset = int(child.get('startOffset', '0'))
 97 |                         length = int(child.get('length', '0'))
 98 |                         text = content_text[start_offset:start_offset+length]
 99 |                         
100 |                         # Apply formatting
101 |                         if child.get('bold', 'false') == 'true' and child.get('italic', 'false') == 'true':
102 |                             text = f"***{text}***"
103 |                         elif child.get('bold', 'false') == 'true':
104 |                             text = f"**{text}**"
105 |                         elif child.get('italic', 'false') == 'true':
106 |                             text = f"*{text}*"
107 |                             
108 |                         paragraph_text += text
109 |                         
110 |                     elif child.tag == 'space':
111 |                         paragraph_text += " "
112 |                     elif child.tag == 'image':
113 |                         # For images, we'll just add a placeholder in markdown
114 |                         paragraph_text += "[Image]"
115 |                 
116 |                 # Apply alignment if needed
117 |                 if alignment_tag:
118 |                     paragraph_text = f"{alignment_tag}{paragraph_text}</div>"
119 |                 
120 |                 markdown_output += paragraph_text + "\n\n"
121 |                 
122 |             elif elem.tag == 'table':
123 |                 # Handle tables
124 |                 column_count = int(elem.get('columnCount', '1'))
125 |                 rows = elem.findall('row')
126 |                 
127 |                 # Create table header row with correct number of columns
128 |                 markdown_output += "| " + " | ".join(["Column"] * column_count) + " |\n"
129 |                 markdown_output += "| " + " | ".join(["---"] * column_count) + " |\n"
130 |                 
131 |                 for row in rows:
132 |                     cells = row.findall('cell')
133 |                     row_text = "| "
134 |                     
135 |                     for cell in cells:
136 |                         cell_text = ""
137 |                         paragraphs = cell.findall('paragraph')
138 |                         
139 |                         for para in paragraphs:
140 |                             para_text = ""
141 |                             
142 |                             for child in para:
143 |                                 if child.tag == 'content':
144 |                                     # Get the text
145 |                                     start_offset = int(child.get('startOffset', '0'))
146 |                                     length = int(child.get('length', '0'))
147 |                                     text = content_text[start_offset:start_offset+length]
148 |                                     
149 |                                     # Apply formatting
150 |                                     if child.get('bold', 'false') == 'true' and child.get('italic', 'false') == 'true':
151 |                                         text = f"***{text}***"
152 |                                     elif child.get('bold', 'false') == 'true':
153 |                                         text = f"**{text}**"
154 |                                     elif child.get('italic', 'false') == 'true':
155 |                                         text = f"*{text}*"
156 |                                         
157 |                                     para_text += text
158 |                                     
159 |                                 elif child.tag == 'space':
160 |                                     para_text += " "
161 |                                 elif child.tag == 'image':
162 |                                     para_text += "[Image]"
163 |                             
164 |                             cell_text += para_text + " "
165 |                         
166 |                         # Remove pipe characters from cell content as they would break the markdown table
167 |                         cell_text = cell_text.replace("|", "\\|").strip()
168 |                         row_text += cell_text + " | "
169 |                     
170 |                     markdown_output += row_text + "\n"
171 |                 
172 |                 markdown_output += "\n"
173 |     else:
174 |         print("'elements' could not be found in the XML.")
175 | 
176 |     return markdown_output
177 | 
178 | def main():
179 |     if len(sys.argv) < 2:
180 |         print("Usage: python udf_to_markdown.py input.udf")
181 |         exit()
182 | 
183 |     udf_file = sys.argv[1]
184 | 
185 |     if not os.path.isfile(udf_file):
186 |         print(f"Input file not found: {udf_file}")
187 |         exit()
188 | 
189 |     # Convert UDF to markdown and print to console
190 |     markdown_content = udf_to_markdown(udf_file)
191 |     print(markdown_content)
192 |     
193 |     # Optionally save to a file
194 |     filename, ext = os.path.splitext(udf_file)
195 |     markdown_file = filename + '.md'
196 |     with open(markdown_file, 'w', encoding='utf-8') as md_file:
197 |         md_file.write(markdown_content)
198 |     print(f"Markdown file created: {markdown_file}")
199 | 
200 | if __name__ == '__main__':
201 |     main()


--------------------------------------------------------------------------------
/Docs.md:
--------------------------------------------------------------------------------
  1 | # UYAP UDF Dosya Formatı
  2 | 
  3 | ## İçindekiler
  4 | 
  5 | 1.  [Genel Bakış](#genel-bakış)
  6 | 2.  [UDF Dosya Yapısı](#udf-dosya-yapısı)
  7 | 3.  [XML Yapısı](#xml-yapısı)
  8 | 4.  [Kök Eleman](#kök-eleman)
  9 | 5.  [Ana Bölümler](#ana-bölümler)
 10 |     * [İçerik Bölümü (`<content>`)](#içerik-bölümü-content)
 11 |     * [Özellikler Bölümü (`<properties>`)](#özellikler-bölümü-properties)
 12 |     * [Elemanlar Bölümü (`<elements>`)](#elemanlar-bölümü-elements)
 13 |     * [Stiller Bölümü (`<styles>`)](#stiller-bölümü-styles)
 14 |     * [Veri Bölümü (`<data>`) (Varsayımsal)](#veri-bölümü-data-varsayımsal)
 15 | 6.  [Detaylı Eleman Açıklamaları ve Özellik Örnekleri](#detaylı-eleman-açıklamaları-ve-özellik-örnekleri)
 16 |     * [Üstbilgi (`<header>`)](#üstbilgi-header)
 17 |     * [Altbilgi (`<footer>`)](#altbilgi-footer)
 18 |     * [Paragraf (`<paragraph>`)](#paragraf-paragraph)
 19 |     * [İçerik (`<content>` elemanı)](#içerik-content-elemanı)
 20 |     * [Resim (`<image>`)](#resim-image)
 21 |     * [Tablo (`<table>`)](#tablo-table)
 22 |     * [Satır (`<row>`)](#satır-row)
 23 |     * [Hücre (`<cell>`)](#hücre-cell)
 24 |     * [Sekme (`<tab>`)](#sekme-tab)
 25 |     * [Boşluk (`<space>`)](#boşluk-space)
 26 |     * [Sayfa Sonu (`<page-break>`)](#sayfa-sonu-page-break)
 27 |     * [Alan (`<field>`) (Varsayımsal)](#alan-field-varsayımsal)
 28 | 
 29 | ## Genel Bakış
 30 | 
 31 | Bu belge, belge şablonlama ve biçimlendirme için kullanılan UYAP UDF (Ulusal Yargı Ağı Projesi Doküman Formatı) ve dahili XML formatının yapısını ve elemanlarını açıklar. Bu format, çeşitli biçimlendirme seçenekleri, tablolar, gömülü öğeler, üstbilgiler, altbilgiler ve listeler içeren zengin metin belgelerini temsil etmek için tasarlanmıştır.
 32 | 
 33 | ## UDF Dosya Yapısı
 34 | 
 35 | UDF formatı, esasen belirli bir iç yapıya sahip bir ZIP arşividir:
 36 | 
 37 | 1.  UDF (ZIP) içindeki ana dosya `content.xml` olarak adlandırılır.
 38 | 2.  Bu `content.xml` dosyası, XML formatında gerçek belge içeriğini ve biçimlendirme bilgilerini içerir.
 39 | 3.  ZIP arşivinde diğer kaynaklar da bulunabilir. Örneğin, `content.xml` içinde base64 olarak kodlanmamış büyük resim dosyaları ayrı olarak saklanabilir ve `content.xml`'den referans verilebilir (ancak `<image imageData="...">` elemanı base64 gömülü resimleri destekler).
 40 | 
 41 | Bir UDF dosyasının içeriğini düzenlemek veya görüntülemek için:
 42 | 
 43 | 1.  Dosya uzantısını `.udf`'den `.zip`'e değiştirin
 44 | 2.  ZIP dosyasının içeriğini çıkarın
 45 | 3.  `content.xml` dosyasını açın ve düzenleyin
 46 | 4.  Düzenlenmiş dosyaları tekrar ZIP arşivine paketleyin
 47 | 5.  ZIP dosyasını tekrar `.udf` olarak yeniden adlandırın
 48 | 
 49 | ## XML Yapısı
 50 | 
 51 | `content.xml` dosyası, aşağıda ayrıntılı olarak açıklayacağımız belirli bir XML yapısını takip eder.
 52 | 
 53 | ## Kök Eleman
 54 | 
 55 | XML belgesinin kök elemanı, aşağıdaki özelliğe sahip `<template>`'dir:
 56 | 
 57 |   - `format_id`: Formatın sürümü
 58 |       * Örnek: `format_id="1.8"`
 59 |   - `webID`: Belge için web tabanlı bir kimlik (isteğe bağlı).
 60 |   - `institutionID`: Kurum kimliği (isteğe bağlı).
 61 |   - `isTemplate`: Belgenin bir şablon olup olmadığını belirtir (`true`/`false`).
 62 |   - `description`: Dokümanın genel bir açıklaması.
 63 | 
 64 | ## Ana Bölümler
 65 | 
 66 | `<template>` elemanı genellikle dört ana bölüm içerir. UYAP sisteminin şablonlama yeteneklerine bağlı olarak bir `<data>` bölümü de bulunabilir:
 67 | 
 68 | 1.  `<content>`: Belgenin ham metin içeriği
 69 | 2.  `<properties>`: Belge genelindeki özellikler
 70 | 3.  `<elements>`: Belgenin yapısı ve biçimlendirmesi
 71 | 4.  `<styles>`: Belgede kullanılan metin stilleri
 72 | 5.  `<data>` (Varsayımsal): Şablon belgelerde, alanları doldurmak için kullanılacak verileri içerebilir. Yapısı UYAP'a özgü olabilir.
 73 | 
 74 | ### İçerik Bölümü (`<content>`)
 75 | 
 76 | `<content>` bölümü bir CDATA bloğu içine sarılmıştır ve belgenin ham metnini içerir. Bu, üstbilgiler, altbilgiler ve ana gövde metni dahil olmak üzere tüm metinsel içeriği içerir.
 77 | 
 78 | Örnek:
 79 | 
 80 | ```xml
 81 | <content><![CDATA[
 82 |   Bu, belgenin ham içeriğidir.
 83 |   Özel karakterler dahil her türlü metni içerebilir.
 84 | ]]></content>
 85 | ```
 86 | 
 87 | **Önemli Not:** İçerik bölümü, tüm metinsel verileri içeren tek bir havuz olarak çalışır. `<elements>` bölümündeki `<content>` elemanları, `startOffset` ve `length` özellikleri ile bu içerik havuzundaki belirli metin parçalarını referans alır.
 88 | 
 89 | ### Özellikler Bölümü (`<properties>`)
 90 | 
 91 | `<properties>` elemanı, sayfa düzenini tanımlayan özelliklerle bir `<pageFormat>` elemanı içerir:
 92 | 
 93 |   - **`<pageFormat>` Elemanı:**
 94 | 
 95 |       * `mediaSizeName`: Sayfa boyutunu tanımlar.
 96 |           * Değerler: Standart kağıt boyutlarını temsil eden tamsayı veya string (örn: "A4", "LETTER").
 97 |           * Örnek: `mediaSizeName="A4"`
 98 |       * `leftMargin`, `rightMargin`, `topMargin`, `bottomMargin`: Sayfa kenar boşlukları (genellikle punto veya UYAP'a özgü birim cinsinden).
 99 |           * Değerler: Ondalık sayılar.
100 |           * Örnek: `leftMargin="70.86"` (yaklaşık 2.5 cm)
101 |       * `paperOrientation`: Sayfa yönü.
102 |           * Değerler: Dikey için "portrait" veya "1", yatay için "landscape" veya "2".
103 |           * Örnek: `paperOrientation="portrait"`
104 |       * `headerFOffset`, `footerFOffset`: Üstbilgi ve altbilginin sayfa kenarından uzaklığı (offset).
105 |           * Değerler: Ondalık sayılar.
106 |           * Örnek: `headerFOffset="30.0"`
107 |       * `pageBorderType`: Sayfa kenarlığının türü (örn: "single", "double", "none").
108 |       * `pageBorderColor`: Sayfa kenarlığının rengi.
109 |       * `pageBorderArt`: Sanatsal sayfa kenarlığı (eğer destekleniyorsa).
110 |       * `pageBorderDisplayHorizontal`, `pageBorderDisplayVertical`, `pageBorderDisplayOnFirstPage`: Sayfa kenarlığının gösterim seçenekleri.
111 |       * `pageBorderDistanceFrom`: Kenarlığın metinden veya sayfa kenarından uzaklığı.
112 |       * `pageBorderTop`, `pageBorderBottom`, `pageBorderLeft`, `pageBorderRight`: Her bir kenar için kenarlık kalınlığı veya stili.
113 |       * `pageColumns`: Sayfa üzerindeki sütun sayısı.
114 |       * `pageColumnSpacing`: Sütunlar arası boşluk.
115 | 
116 |   - **`<bgImage>` Elemanı (Sayfa Arka Plan Resmi):**
117 | 
118 |       * `bgImageSource`: Görüntünün kaynak konumu (eğer ZIP içinde ayrı bir dosyaysa).
119 |           * Örnek: `bgImageSource="/resources/images/background.jpg"`
120 |       * `bgImageData`: Base64 kodlanmış görüntü verisi.
121 |           * Örnek: `bgImageData="iVBORw0KGgoAAAANSUhEUgAA..."`
122 |       * `bgImageBottomMargin`, `bgImageUpMargin`, `bgImageRightMargin`, `bgImageLeftMargin`: Arka plan görüntüsünün kenar boşlukları.
123 |       * `bgImageAlign`: Arka plan resminin hizalanması (örn: "center", "tile").
124 |       * `bgImageRepeat`: Arka plan resminin tekrarlanma şekli (örn: "repeat", "no-repeat").
125 |       * `bgImageWatermark`: Resmin filigran olarak kullanılıp kullanılmayacağı (`true`/`false`).
126 |       * `bgImageOpacity`: Resmin opaklığı (0.0 - 1.0).
127 | 
128 |   - **`<pageImage>` Elemanı (Özel Sayfa Görüntüleri/Filigranlar - UYAP `ac` sabitleriyle ilişkili):**
129 | 
130 |       * `pageImageClassName`: Kullanılacak özel `IPageImage` Java sınıfının adı (UYAP'a özgü).
131 |       * `pageImageGradientData`: Gradyan bir sayfa resmi için veri (UYAP'a özgü).
132 | 
133 | Örnek:
134 | 
135 | ```xml
136 | <properties>
137 |   <pageFormat mediaSizeName="A4" leftMargin="70.86" rightMargin="70.86" topMargin="56.69" bottomMargin="56.69" paperOrientation="portrait" headerFOffset="30.0" footerFOffset="30.0" />
138 |   <bgImage bgImageData="iVBORw0KGgoAAAANSUhEUgAA..." bgImageAlign="center" bgImageRepeat="no-repeat" />
139 | </properties>
140 | ```
141 | 
142 | ### Elemanlar Bölümü (`<elements>`)
143 | 
144 | `<elements>` bölümü, belgenin yapısını ve biçimlendirmesini tanımlar. Aşağıdaki elemanları içerebilir:
145 | 
146 | 1.  `<header>` (Üstbilgi)
147 | 2.  `<footer>` (Altbilgi)
148 | 3.  `<paragraph>` (Paragraf)
149 | 4.  `<content>` (Biçimli metin parçası - bu `<elements>` içindeki `<content>` elemanıdır, kök `<content>` CDATA bloğu değildir)
150 | 5.  `<table>` (Tablo)
151 | 6.  `<image>` (Resim)
152 | 7.  `<tab>` (Sekme karakteri)
153 | 8.  `<space>` (Boşluk karakteri)
154 | 9.  `<page-break>` (Sayfa Sonu)
155 | 10. `<field>` (Şablon Alanı - varsayımsal, UYAP'a özgü olabilir)
156 | 
157 | `<elements>` elemanı bir `resolver` özelliğine sahip olabilir, bu özellik belgenin hangi stil çözümleyiciyi (varsayılan stil setini) kullanacağını belirtir.
158 | Örnek: `<elements resolver="hvl-default">`
159 | 
160 | ### Stiller Bölümü (`<styles>`)
161 | 
162 | `<styles>` bölümü, belgede kullanılan metin stillerini tanımlar:
163 | 
164 |   - **`<style>` Elemanı:**
165 |       * `name`: Stilin benzersiz adı (örn: "hvl-default", "Baslik1").
166 |       * `description`: Stilin açıklaması.
167 |       * `family`: Yazı tipi ailesi (örn: "Times New Roman", "Arial").
168 |       * `size`: Yazı tipi boyutu (genellikle punto cinsinden).
169 |       * `bold`, `italic`, `underline`, `strikethrough`: Metin stili (`true`/`false`).
170 |       * `foreground`: Metin rengi (RGB formatında, genellikle işaretli bir tam sayı olarak).
171 |       * `background`: Metin arka plan rengi (RGB formatında).
172 |       * `subscript`, `superscript`: Alt simge, üst simge (`true`/`false`).
173 |       * `parent`: Bu stilin miras aldığı başka bir stilin adı (hiyerarşik stil yönetimi için).
174 |           * Örnek: `<style name="MyCustomStyle" parent="hvl-default" ... />`
175 |       * `FONT_ATTRIBUTE_KEY`: Bu, bir UYAP Java Swing ayrıştırıcısının içsel bir detayı olabilir ve stilin Swing `Font` nesnesiyle nasıl eşleştiğini belirtebilir; genel UDF formatının bir parçası olmayabilir.
176 | 
177 | Örnek:
178 | 
179 | ```xml
180 | <styles>
181 |   <style name="default" description="Varsayılan" family="Dialog" size="12" bold="false" italic="false" foreground="-13421773" />
182 |   <style name="hvl-default" parent="default" family="Times New Roman" size="12" description="Gövde Metni" />
183 |   <style name="Baslik1" parent="hvl-default" size="16" bold="true" foreground="-16777216" />
184 | </styles>
185 | ```
186 | 
187 | ### Veri Bölümü (`<data>`) (Varsayımsal)
188 | 
189 | Eğer UDF dosyası bir şablon olarak kullanılıyorsa, `<elements>` bölümündeki `<field>` elemanlarını doldurmak için bir `<data>` bölümü bulunabilir. Bu bölümün yapısı genellikle UYAP sistemine özgüdür ve XML veya başka bir formatta olabilir.
190 | 
191 | Örnek (tamamen varsayımsal):
192 | 
193 | ```xml
194 | <data>
195 |   <record>
196 |     <adi>Ahmet</adi>
197 |     <soyadi>Yılmaz</soyadi>
198 |     <davaNo>2023/123</davaNo>
199 |   </record>
200 | </data>
201 | ```
202 | 
203 | ## Detaylı Eleman Açıklamaları ve Özellik Örnekleri
204 | 
205 | ### Üstbilgi (`<header>`)
206 | 
207 | `<header>` elemanı ile temsil edilir, üstbilgi içeriği için paragraflar içerir.
208 | 
209 | Özellikler:
210 | 
211 |   - `background`: Üstbilgi arka plan rengi (RGB formatında).
212 |       * Örnek: `background="-8323073"` (açık mavi)
213 |   - `foreground`: Üstbilgi metin rengi (RGB formatında).
214 |       * Örnek: `foreground="-16776961"` (mavi)
215 | 
216 | Örnek:
217 | 
218 | ```xml
219 | <header background="-8323073" foreground="-16776961">
220 |   <paragraph Alignment="1"> <content family="Times New Roman" size="10" startOffset="0" length="25" /> </paragraph>
221 | </header>
222 | ```
223 | 
224 | ### Altbilgi (`<footer>`)
225 | 
226 | `<footer>` elemanı ile temsil edilir ve aşağıdaki özelliklere sahiptir:
227 | 
228 |   - `background`: Altbilgi arka plan rengi (RGB formatında).
229 |   - `foreground`: Altbilgi metin rengi (RGB formatında).
230 |   - `pageNumber-spec`: Sayfa numarası formatı ve konumu için özel bir belirteç (UYAP'a özgü olabilir).
231 |       * Örnek: `pageNumber-spec="BSP32_40"`
232 |   - `pageNumber-color`: Sayfa numarası rengi (RGB formatında).
233 |   - `pageNumber-fontFace`: Sayfa numarası için yazı tipi.
234 |   - `pageNumber-fontSize`: Sayfa numarası için yazı tipi boyutu.
235 |   - `pageNumber-foreStr`: Sayfa numarasından önce gelen metin (örn: "Sayfa ").
236 |   - `pageNumber-afterStr`: Sayfa numarasından sonra gelen metin (örn: " / ToplamSayfa").
237 |   - `pageNumber-pageStartNumStr`: Başlangıç sayfa numarası.
238 | 
239 | Örnek:
240 | 
241 | ```xml
242 | <footer pageNumber-spec="PageNumCenter" pageNumber-color="-16777216" pageNumber-fontFace="Arial" pageNumber-fontSize="10" pageNumber-foreStr="Sayfa: ">
243 |   <paragraph Alignment="2"> <content family="Arial" size="9" startOffset="26" length="15" /> </paragraph>
244 | </footer>
245 | ```
246 | 
247 | ### Paragraf (`<paragraph>`)
248 | 
249 | `<paragraph>` elemanı ile temsil edilir ve aşağıdaki özelliklere sahiptir:
250 | 
251 |   - `Alignment`: Metin hizalama.
252 |       * Değerler: Sola için "0", ortaya için "1", sağa için "2", iki yana yasla için "3".
253 |       * Örnek: `Alignment="3"` (iki yana yasla)
254 |   - `LeftIndent`, `RightIndent`: Paragrafın sol ve sağ girintileri (punto cinsinden).
255 |   - `FirstLineIndent`: İlk satır girintisi (punto cinsinden).
256 |   - `SpaceBefore` (veya `SpaceAbove`), `SpaceAfter` (veya `SpaceBelow`): Paragraftan önceki ve sonraki boşluk (punto cinsinden).
257 |   - `LineSpacing`: Satır aralığı (örn: 1.0 tek, 1.5, 2.0 çift).
258 |   - `TabSet`: Sekme durak pozisyonları, hizalamaları ve öncü karakterleri (örn: "36.0:0:0,72.0:1:1" - 36pt sola hizalı, 72pt ortaya hizalı nokta öncülü).
259 |   - `Bulleted`: Madde işaretli liste öğesi ise "true".
260 |   - `BulletType`: Madde işareti türü (örn: "BULLET\_TYPE\_ELLIPSE", "BULLET\_TYPE\_SQUARE", veya UYAP'a özgü bir sabit).
261 |   - `Numbered`: Numaralandırılmış liste öğesi ise "true".
262 |   - `NumberType`: Numaralandırma türü (örn: "NUMBER\_TYPE\_NUMBER\_DOT", "NUMBER\_TYPE\_ROMAN\_UPPER", UYAP'a özgü).
263 |   - `ListLevel`: Liste öğesinin girinti seviyesi (0'dan başlar).
264 |   - `ListId`: Aynı listeye ait öğeleri gruplamak için bir tanımlayıcı.
265 |   - `StartNumber`: Numaralandırılmış listeler için başlangıç numarası.
266 |   - `ListRestart`: Bu seviyedeki listenin numarasının yeniden başlatılıp başlatılmayacağı.
267 |   - `ListBulletFont`, `ListBulletColor`, `ListBulletSize`: Madde imi/numara için özel yazı tipi, renk ve boyut.
268 |   - `ListStyleName`: Önceden tanımlanmış bir liste stiline referans.
269 |   - `KeepWithNext`: Paragrafın bir sonraki paragrafla aynı sayfada tutulup tutulmayacağı (`true`/`false`).
270 |   - `HangingIndent`: Asılı girinti miktarı (punto cinsinden).
271 |   - `ParagraphGroupName`, `ParagraphGroupRepeatable`: Şablonlama için paragraf gruplama özellikleri (UYAP'a özgü).
272 |   - `family`, `size`, `bold`, `italic`, vb.: Paragraf içindeki tüm metinler için varsayılan stil özellikleri (içerideki `<content>` elemanları bunları geçersiz kılabilir).
273 | 
274 | Örnek:
275 | 
276 | ```xml
277 | <paragraph Alignment="0" LeftIndent="36.0" LineSpacing="1.5" SpaceBefore="6.0" SpaceAfter="6.0" Bulleted="true" BulletType="BULLET_TYPE_FILLED_CIRCLE" ListLevel="0" ListId="1">
278 |   <content startOffset="77" length="35" style="MyListStyle" /> </paragraph>
279 | ```
280 | 
281 | ### İçerik (`<content>` elemanı)
282 | 
283 | `<elements>` içindeki `<content>` elemanı, belirli biçimlendirmeye sahip metin parçalarını temsil eder:
284 | 
285 |   - `startOffset`, `length`: Ana `<content>` CDATA bloğundaki metnin başlangıç konumu ve uzunluğu.
286 |   - `family`, `size`, `bold`, `italic`, `underline`, `strikethrough`, `subscript`, `superscript`: Metin biçimlendirme özellikleri.
287 |   - `foreground`, `background`: Metin ve arka plan rengi (RGB formatında).
288 |   - `resolver`: Kullanılacak stil çözümleyiciyi (varsa).
289 |   - `style`: `<styles>` bölümünde tanımlanmış bir stile referans.
290 |       * Örnek: `style="VurguluMetin"`
291 |   - **Alan Özellikleri (Eğer metin bir şablon alanıysa - UYAP `V` sabitleriyle ilişkili):**
292 |       * `fieldName`: Alanın adı.
293 |       * `fieldType`: Alanın türü (örn: "text", "date", "number", UYAP'a özgü).
294 |       * `fieldVisible`: Alanın görünür olup olmadığı (`true`/`false`).
295 |       * `fieldEditable`: Alanın düzenlenebilir olup olmadığı (`true`/`false`).
296 |       * `fieldGroup`: Alanın ait olduğu grup.
297 |       * `fieldDescription`: Alan için açıklama.
298 |   - **Barkod Özellikleri (Eğer metin bir barkod ise - UYAP `V` sabitleriyle ilişkili):**
299 |       * `barcodeData`: Barkodun içeriği olan veri.
300 |       * `barcodeType`: Barkod türü (örn: "Code128", "QRCode").
301 |   - `backgroundImageData`: Bu metin parçası için özel bir arka plan resmi (base64).
302 | 
303 | Örnek:
304 | 
305 | ```xml
306 | <paragraph>
307 |   <content startOffset="100" length="10" style="NormalMetin" />
308 |   <content startOffset="110" length="15" style="VurguluMetin" bold="true" foreground="-65536" /> <content startOffset="125" length="20" fieldName="MusteriAdi" fieldType="text" /> </paragraph>
309 | ```
310 | 
311 | ### Resim (`<image>`)
312 | 
313 | Resimler `<image>` elemanı ile temsil edilir:
314 | 
315 |   - `imageData`: Base64 ile kodlanmış resim verisi.
316 |   - `width`, `height`: Resmin görüntülenme genişliği ve yüksekliği (punto veya piksel).
317 |   - `alignment`: Resmin hizalanması (paragraf içinde).
318 |   - `description`: Resim için alternatif metin veya açıklama.
319 |   - `family`, `size`: Eğer resim yüklenemezse gösterilecek yer tutucu metnin stili.
320 | 
321 | Örnek:
322 | 
323 | ```xml
324 | <paragraph Alignment="1"> <image imageData="iVBORw0KGgoAAAANSUhEUgAA..." width="200" height="150" description="Şirket Logosu" />
325 | </paragraph>
326 | ```
327 | 
328 | ### Tablo (`<table>`)
329 | 
330 | Tablolar `<table>` elemanı ile temsil edilir:
331 | 
332 |   - `tableName`: Tablonun adı (isteğe bağlı).
333 |   - `columnCount`: Tablodaki sütun sayısı.
334 |   - `columnSpans`: Her bir sütunun genişliğini tanımlayan virgülle ayrılmış değerler listesi (punto veya yüzde).
335 |       * Örnek: `columnSpans="150,200,100"`
336 |   - `width`: Tablonun toplam genişliği (isteğe bağlı, `columnSpans` genellikle yeterlidir).
337 |   - `widthType`: Tablo genişliğinin türü (örn: "fixed", "percentage").
338 |   - `border`: Tüm tablo için varsayılan kenarlık stili (örn: "borderCell", "borderOuter", "none"). Daha spesifik kenarlıklar hücre bazında tanımlanabilir.
339 |   - `borderType`: Daha detaylı kenarlık tipi (UYAP'a özgü olabilir).
340 |   - `borderColor`, `borderWidth`: Varsayılan kenarlık rengi ve kalınlığı.
341 |   - `cellSpacing`, `cellPadding`: Hücreler arası boşluk ve hücre içi dolgu.
342 |   - `alignment`: Tablonun sayfa üzerindeki hizalanması (örn: "left", "center", "right").
343 | 
344 | Örnek:
345 | 
346 | ```xml
347 | <table tableName="MusteriListesi" columnCount="3" columnSpans="50,*,100" border="borderCell" alignment="center">
348 |   </table>
349 | ```
350 | 
351 | (Not: `*` sütun genişliği için "kalan alanı kullan" anlamına gelebilir, UDF formatına bağlıdır)
352 | 
353 | ### Satır (`<row>`)
354 | 
355 | `<table>` içindeki satırlar `<row>` elemanı ile temsil edilir:
356 | 
357 |   - `rowName`: Satırın adı (isteğe bağlı).
358 |   - `rowType`: Satırın türü (örn: "headerRow", "dataRow", "footerRow"). Başlık satırları sayfa sonlarında tekrarlanabilir.
359 |   - `height`: Satırın sabit yüksekliği (punto).
360 |   - `height_min`, `height_max`: Minimum ve maksimum satır yüksekliği.
361 |   - `cantSplit`: Satırın sayfa sonlarında bölünemeyeceğini belirtir (`true`/`false`).
362 | 
363 | Örnek:
364 | 
365 | ```xml
366 | <row rowType="headerRow" height="30" cantSplit="true">
367 |   </row>
368 | ```
369 | 
370 | ### Hücre (`<cell>`)
371 | 
372 | `<row>` içindeki hücreler `<cell>` elemanı ile temsil edilir:
373 | 
374 |   - `width`: Hücrenin genişliği (eğer `<table>`deki `columnSpans`'ı geçersiz kılıyorsa).
375 |   - `height`: Hücrenin yüksekliği (eğer `<row>`daki `height`'ı geçersiz kılıyorsa).
376 |   - `bgColor` (veya `cellColor`): Hücrenin arka plan rengi (RGB formatında).
377 |       * Örnek: `bgColor="-256"` (sarı)
378 |   - `vAlign`: İçeriğin dikey hizalanması ("top", "middle", "bottom").
379 |       * Örnek: `vAlign="middle"`
380 |   - `hAlign` (veya `textAlign`): İçeriğin yatay hizalanması (hücre içindeki paragraflar bunu geçersiz kılabilir).
381 |   - `colspan`: Hücrenin yatay olarak kaç sütun boyunca birleşeceği.
382 |       * Örnek: `colspan="2"`
383 |   - `rowspan`: Hücrenin dikey olarak kaç satır boyunca birleşeceği.
384 |       * Örnek: `rowspan="3"`
385 |   - `borderTop`, `borderBottom`, `borderLeft`, `borderRight`: Her bir kenar için stil, renk, kalınlık.
386 |       * Örnek: `borderBottom="solid 1px #000000"`
387 |   - `paddingTop`, `paddingBottom`, `paddingLeft`, `paddingRight`: Hücre içi dolgu miktarları.
388 | 
389 | Örnek:
390 | 
391 | ```xml
392 | <row>
393 |   <cell width="150" height="50" bgColor="-256" vAlign="middle">
394 |     <paragraph Alignment="1">
395 |       <content startOffset="200" length="12" /> </paragraph>
396 |   </cell>
397 |   <cell colspan="2" vAlign="top">
398 |     <paragraph>
399 |       <content startOffset="212" length="25" /> </paragraph>
400 |   </cell>
401 | </row>
402 | <row>
403 |   <cell rowspan="2" bgColor="-16711681" vAlign="bottom">
404 |     <paragraph Alignment="2">
405 |       <content startOffset="237" length="22" /> </paragraph>
406 |   </cell>
407 |   <cell>
408 |     <paragraph>
409 |       <content startOffset="259" length="13" /> </paragraph>
410 |   </cell>
411 |   <cell>
412 |     <paragraph>
413 |       <content startOffset="272" length="13" /> </paragraph>
414 |   </cell>
415 | </row>
416 | <row>
417 |   <cell>
418 |     <paragraph>
419 |       <content startOffset="285" length="13" /> </paragraph>
420 |   </cell>
421 |   <cell>
422 |     <paragraph>
423 |       <content startOffset="298" length="13" /> </paragraph>
424 |   </cell>
425 | </row>
426 | ```
427 | 
428 | ### Sekme (`<tab>`)
429 | 
430 | `<tab>` elemanı bir sekme karakterini temsil eder:
431 | 
432 |   - `startOffset`, `length`: Ana `<content>` CDATA bloğundaki konumu. `length` genellikle 1'dir.
433 | 
434 | Örnek:
435 | 
436 | ```xml
437 | <paragraph>
438 |   <content startOffset="311" length="5" /> <tab startOffset="316" length="1" />
439 |   <content startOffset="317" length="10" /> </paragraph>
440 | ```
441 | 
442 | ### Boşluk (`<space>`)
443 | 
444 | `<space>` elemanı, genellikle `<content>` elemanları arasında ek bir boşluk (space karakteri) eklemek için kullanılır. `startOffset` ve `length` (genellikle 1) öznitelikleriyle ana CDATA bloğundaki bir boşluğu referans alabilir veya sadece anlamsal bir boşluk olarak var olabilir.
445 | 
446 | Örnek:
447 | 
448 | ```xml
449 | <paragraph>
450 |   <content startOffset="327" length="4" /> <space startOffset="331" length="1" /> <content startOffset="332" length="4" /> </paragraph>
451 | ```
452 | 
453 | ### Sayfa Sonu (`<page-break>`)
454 | 
455 | `<page-break>` elemanı, belgede o noktada yeni bir sayfaya geçilmesini zorlar. Genellikle içinde boş veya kısa bir `<paragraph>` ve `<content>` elemanı barındırabilir.
456 | 
457 | Örnek:
458 | 
459 | ```xml
460 | <page-break>
461 |   <paragraph>
462 |     <content startOffset="336" length="0" /> </paragraph>
463 | </page-break>
464 | ```
465 | 
466 | ### Alan (`<field>`) (Varsayımsal)
467 | 
468 | Eğer UDF şablonlama için kullanılıyorsa, `<elements>` içinde `<field>` adında özel bir eleman bulunabilir. Bu eleman, `<data>` bölümünden veya harici bir kaynaktan gelen veriyle doldurulacak yer tutucuları temsil eder. Öznitelikleri `<content>` elemanının alan özelliklerine benzer olabilir:
469 | 
470 |   - `name` (veya `fieldName`): Alanın benzersiz adı.
471 |   - `type` (veya `fieldType`): Alanın veri türü (örn: "text", "date", "image").
472 |   - `default`: Veri bulunamazsa gösterilecek varsayılan değer.
473 |   - Formatlama öznitelikleri (font, size, color vb.).
474 | 
475 | Örnek (tamamen varsayımsal):
476 | 
477 | ```xml
478 | <paragraph>
479 |   <content startOffset="350" length="10" /> <field name="MusteriAdi" type="text" startOffset="360" length="0" style="AlanStili" /> </paragraph>
480 | ```
481 | 
482 | **Not:** `<field>` elemanının varlığı ve yapısı UYAP sisteminin özel uygulamasına bağlıdır. Genellikle bu tür alanlar `<content>` elemanlarına eklenmiş özel özniteliklerle de (yukarıda `<content>` bölümünde belirtildiği gibi `fieldName` vb.) temsil edilebilir.
483 | 


--------------------------------------------------------------------------------
/udf_to_pdf.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import os
  3 | import xml.etree.ElementTree as ET
  4 | from reportlab.lib.pagesizes import A4
  5 | from reportlab.pdfgen import canvas
  6 | from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
  7 | from reportlab.platypus import SimpleDocTemplate, Paragraph, Table, TableStyle, Spacer, Image, PageBreak
  8 | from reportlab.lib import colors
  9 | from reportlab.lib.units import mm, inch
 10 | import base64
 11 | import io
 12 | import zipfile
 13 | from reportlab.pdfbase import pdfmetrics
 14 | from reportlab.pdfbase.ttfonts import TTFont
 15 | from reportlab.lib.enums import TA_LEFT, TA_CENTER, TA_RIGHT, TA_JUSTIFY
 16 | 
 17 | # Add fonts that support Turkish characters with bold and italic variations
 18 | pdfmetrics.registerFont(TTFont('DejaVuSerif', 'DejaVuSerif.ttf'))
 19 | pdfmetrics.registerFont(TTFont('DejaVuSerif-Bold', 'DejaVuSerif-Bold.ttf'))
 20 | pdfmetrics.registerFont(TTFont('DejaVuSerif-Italic', 'DejaVuSerif-Italic.ttf'))
 21 | pdfmetrics.registerFont(TTFont('DejaVuSerif-BoldItalic', 'DejaVuSerif-BoldItalic.ttf'))
 22 | 
 23 | # Create font family
 24 | pdfmetrics.registerFontFamily('DejaVuSerif', normal='DejaVuSerif', bold='DejaVuSerif-Bold',
 25 |                              italic='DejaVuSerif-Italic', boldItalic='DejaVuSerif-BoldItalic')
 26 | 
 27 | def is_zip_file(file_path):
 28 |     """Check if the file is a valid ZIP file"""
 29 |     try:
 30 |         with zipfile.ZipFile(file_path, 'r') as z:
 31 |             return True
 32 |     except zipfile.BadZipFile:
 33 |         return False
 34 | 
 35 | def get_alignment_style(alignment_value):
 36 |     """Convert alignment value from XML to reportlab alignment constant"""
 37 |     if alignment_value == "1":
 38 |         return TA_CENTER
 39 |     elif alignment_value == "3":
 40 |         return TA_JUSTIFY
 41 |     elif alignment_value == "2":
 42 |         return TA_RIGHT
 43 |     else:
 44 |         return TA_LEFT
 45 | 
 46 | def convert_color(color_value):
 47 |     """Convert integer color value to reportlab color"""
 48 |     if color_value is None:
 49 |         return None
 50 |     
 51 |     try:
 52 |         # Convert from negative integer to positive hex
 53 |         color_int = int(color_value)
 54 |         if color_int < 0:
 55 |             color_int = 0xFFFFFFFF + color_int + 1
 56 |         
 57 |         # Extract RGB values
 58 |         r = (color_int >> 16) & 0xFF
 59 |         g = (color_int >> 8) & 0xFF
 60 |         b = color_int & 0xFF
 61 |         
 62 |         return colors.Color(r/255, g/255, b/255)
 63 |     except (ValueError, TypeError):
 64 |         return None
 65 | 
 66 | def process_background_image(bg_image_data, bg_image_source, output_file):
 67 |     """Process background image data and return Image object"""
 68 |     if bg_image_data:
 69 |         try:
 70 |             # Decode base64 image data
 71 |             image_bytes = base64.b64decode(bg_image_data)
 72 |             image_stream = io.BytesIO(image_bytes)
 73 |             
 74 |             # Create reportlab image
 75 |             img = Image(image_stream)
 76 |             return img
 77 |         except Exception as e:
 78 |             print(f"Error processing background image data: {e}")
 79 |     elif bg_image_source:
 80 |         # Try to load from source path if available
 81 |         try:
 82 |             # Check if the source path exists relative to the output file
 83 |             output_dir = os.path.dirname(output_file)
 84 |             # Normalize path
 85 |             source_path = bg_image_source.replace('/resources/', '')
 86 |             img_path = os.path.join(output_dir, source_path)
 87 |             
 88 |             if os.path.exists(img_path):
 89 |                 return Image(img_path)
 90 |             else:
 91 |                 print(f"Background image not found: {img_path}")
 92 |         except Exception as e:
 93 |             print(f"Error processing background image source: {e}")
 94 |     
 95 |     return None
 96 | 
 97 | def udf_to_pdf(udf_file, pdf_file):
 98 |     root = None
 99 |     
100 |     # Check if the file is a ZIP file
101 |     if is_zip_file(udf_file):
102 |         # Process as a ZIP file
103 |         with zipfile.ZipFile(udf_file, 'r') as z:
104 |             if 'content.xml' in z.namelist():
105 |                 with z.open('content.xml') as content_file:
106 |                     tree = ET.parse(content_file, parser=ET.XMLParser(encoding='utf-8'))
107 |                     root = tree.getroot()
108 |             else:
109 |                 print("The 'content.xml' file could not be found in the UDF file.")
110 |                 exit()
111 |     else:
112 |         # Process as an XML file directly
113 |         try:
114 |             tree = ET.parse(udf_file, parser=ET.XMLParser(encoding='utf-8'))
115 |             root = tree.getroot()
116 |         except ET.ParseError:
117 |             print(f"The file {udf_file} is neither a valid ZIP nor a valid XML file.")
118 |             exit()
119 | 
120 |     if root is None:
121 |         print("Failed to parse the file.")
122 |         exit()
123 | 
124 |     # Retrieve content text
125 |     content_element = root.find('content')
126 |     if content_element is not None:
127 |         content_text = content_element.text
128 |         if content_text.startswith('<![CDATA[') and content_text.endswith(']]>'):
129 |             content_text = content_text[9:-3]
130 |     else:
131 |         print("'content' could not be found in the XML.")
132 |         exit()
133 | 
134 |     # Extract page properties
135 |     properties_element = root.find('properties')
136 |     page_format = properties_element.find('pageFormat') if properties_element is not None else None
137 |     
138 |     # Get page margins
139 |     left_margin = float(page_format.get('leftMargin', '42.5')) if page_format is not None else 42.5
140 |     right_margin = float(page_format.get('rightMargin', '42.5')) if page_format is not None else 42.5
141 |     top_margin = float(page_format.get('topMargin', '42.5')) if page_format is not None else 42.5
142 |     bottom_margin = float(page_format.get('bottomMargin', '42.5')) if page_format is not None else 42.5
143 |     
144 |     # Get background image if available
145 |     bg_image = None
146 |     if properties_element is not None:
147 |         bg_image_elem = properties_element.find('bgImage')
148 |         if bg_image_elem is not None:
149 |             bg_image_data = bg_image_elem.get('bgImageData')
150 |             bg_image_source = bg_image_elem.get('bgImageSource')
151 |             bg_image = process_background_image(bg_image_data, bg_image_source, pdf_file)
152 | 
153 |     # Process the 'elements' section
154 |     elements_element = root.find('elements')
155 |     if elements_element is not None:
156 |         # Create the PDF document with specified margins
157 |         pdf = SimpleDocTemplate(
158 |             pdf_file, 
159 |             pagesize=A4,
160 |             leftMargin=left_margin,
161 |             rightMargin=right_margin,
162 |             topMargin=top_margin,
163 |             bottomMargin=bottom_margin
164 |         )
165 |         
166 |         # Create elements list for the PDF
167 |         pdf_elements = []
168 |         styles = getSampleStyleSheet()
169 |         
170 |         # Define a base style that supports Turkish characters - default to DejaVuSerif
171 |         base_style = ParagraphStyle(
172 |             'CustomNormal', 
173 |             parent=styles['Normal'],
174 |             fontName='DejaVuSerif',  # Setting DejaVuSerif as default font
175 |             encoding='utf-8'
176 |         )
177 |         
178 |         # Process styles from the XML
179 |         styles_element = root.find('styles')
180 |         if styles_element is not None:
181 |             for style_elem in styles_element.findall('style'):
182 |                 style_name = style_elem.get('name', '')
183 |                 style_family = style_elem.get('family', 'DejaVuSerif')
184 |                 style_size = float(style_elem.get('size', '12'))
185 |                 style_bold = style_elem.get('bold', 'false') == 'true'
186 |                 style_italic = style_elem.get('italic', 'false') == 'true'
187 |                 style_foreground = convert_color(style_elem.get('foreground'))
188 |                 
189 |                 # Create the style - always use DejaVuSerif
190 |                 style_family = 'DejaVuSerif'
191 |                     
192 |                 custom_style = ParagraphStyle(
193 |                     style_name,
194 |                     parent=base_style,
195 |                     fontName=style_family,
196 |                     fontSize=style_size,
197 |                     textColor=style_foreground if style_foreground else base_style.textColor
198 |                 )
199 |                 
200 |                 # Set bold and italic based on font family
201 |                 if style_bold and style_italic:
202 |                     custom_style.fontName = f"{style_family}-BoldItalic"
203 |                 elif style_bold:
204 |                     custom_style.fontName = f"{style_family}-Bold"
205 |                 elif style_italic:
206 |                     custom_style.fontName = f"{style_family}-Italic"
207 | 
208 |         # Get header and footer elements
209 |         header_element = elements_element.find('header')
210 |         footer_element = elements_element.find('footer')
211 |         
212 |         # Function to process a text block and apply formatting
213 |         def process_text_block(content_elem, current_style):
214 |             text = ""
215 |             
216 |             # Get basic attributes
217 |             start_offset = int(content_elem.get('startOffset', '0'))
218 |             length = int(content_elem.get('length', '0'))
219 |             text_content = content_text[start_offset:start_offset+length]
220 |             
221 |             # Get formatting attributes
222 |             bold = content_elem.get('bold', 'false') == 'true'
223 |             italic = content_elem.get('italic', 'false') == 'true'
224 |             underline = content_elem.get('underline', 'false') == 'true'
225 |             family = content_elem.get('family')
226 |             size = content_elem.get('size')
227 |             foreground = convert_color(content_elem.get('foreground'))
228 |             
229 |             # Apply text formatting - always use DejaVuSerif regardless of what's specified
230 |             if family:
231 |                 # Ignore the family from XML and always use DejaVuSerif
232 |                 current_style.fontName = 'DejaVuSerif'
233 |             if size:
234 |                 current_style.fontSize = float(size)
235 |             if foreground:
236 |                 current_style.textColor = foreground
237 |             
238 |             # Apply emphasis formatting
239 |             formatted_text = text_content
240 |             if bold and italic and underline:
241 |                 formatted_text = f"<u><b><i>{formatted_text}</i></b></u>"
242 |             elif bold and italic:
243 |                 formatted_text = f"<b><i>{formatted_text}</i></b>"
244 |             elif bold and underline:
245 |                 formatted_text = f"<u><b>{formatted_text}</b></u>"
246 |             elif italic and underline:
247 |                 formatted_text = f"<u><i>{formatted_text}</i></u>"
248 |             elif bold:
249 |                 formatted_text = f"<b>{formatted_text}</b>"
250 |             elif italic:
251 |                 formatted_text = f"<i>{formatted_text}</i>"
252 |             elif underline:
253 |                 formatted_text = f"<u>{formatted_text}</u>"
254 |             
255 |             return formatted_text
256 |         
257 |         # Function to process a paragraph element
258 |         def process_paragraph(para_elem, content_buffer, in_header_footer=False):
259 |             # Get paragraph alignment
260 |             alignment = para_elem.get('Alignment', '0')
261 |             alignment_style = get_alignment_style(alignment)
262 |             
263 |             # Get paragraph indentation
264 |             left_indent = float(para_elem.get('LeftIndent', '0'))
265 |             right_indent = float(para_elem.get('RightIndent', '0'))
266 |             first_line_indent = float(para_elem.get('FirstLineIndent', '0'))
267 |             line_spacing = float(para_elem.get('LineSpacing', '1.2'))
268 |             
269 |             # Get paragraph font family - always use DejaVuSerif regardless of what's in the XML
270 |             family = 'DejaVuSerif'
271 |             size = float(para_elem.get('size', '12'))
272 |             
273 |             # Create a custom style for this paragraph
274 |             para_style = ParagraphStyle(
275 |                 f'Style{alignment}',
276 |                 parent=base_style,
277 |                 alignment=alignment_style,
278 |                 leftIndent=left_indent,
279 |                 rightIndent=right_indent,
280 |                 firstLineIndent=first_line_indent,
281 |                 fontName=family,
282 |                 fontSize=size,
283 |                 leading=size * line_spacing  # Leading is the line spacing
284 |             )
285 |             
286 |             # Process the paragraph content
287 |             paragraph_text = ''
288 |             for child in para_elem:
289 |                 if child.tag == 'content':
290 |                     paragraph_text += process_text_block(child, para_style)
291 |                 elif child.tag == 'field':
292 |                     # Process field element (labels like DAVACI, VEKİLİ, etc.)
293 |                     field_name = child.get('fieldName', '')
294 |                     
295 |                     # Get the text from the content buffer if startOffset and length are provided
296 |                     if child.get('startOffset') and child.get('length'):
297 |                         start_offset = int(child.get('startOffset', '0'))
298 |                         length = int(child.get('length', '0'))
299 |                         field_text = content_buffer[start_offset:start_offset+length]
300 |                     else:
301 |                         # Use the fieldName as fallback
302 |                         field_text = field_name
303 |                     
304 |                     # Apply styling 
305 |                     bold = child.get('bold', 'false') == 'true'
306 |                     italic = child.get('italic', 'false') == 'true'
307 |                     underline = child.get('underline', 'false') == 'true'
308 |                     
309 |                     # Format text with style
310 |                     if bold and italic and underline:
311 |                         paragraph_text += f"<u><b><i>{field_text}</i></b></u>"
312 |                     elif bold and italic:
313 |                         paragraph_text += f"<b><i>{field_text}</i></b>"
314 |                     elif bold and underline:
315 |                         paragraph_text += f"<u><b>{field_text}</b></u>"
316 |                     elif italic and underline:
317 |                         paragraph_text += f"<u><i>{field_text}</i></u>"
318 |                     elif bold:
319 |                         paragraph_text += f"<b>{field_text}</b>"
320 |                     elif italic:
321 |                         paragraph_text += f"<i>{field_text}</i>"
322 |                     elif underline:
323 |                         paragraph_text += f"<u>{field_text}</u>"
324 |                     else:
325 |                         paragraph_text += field_text
326 |                 elif child.tag == 'space':
327 |                     paragraph_text += ' '
328 |                 elif child.tag == 'image':
329 |                     # Add the image
330 |                     image_data = child.get('imageData')
331 |                     if image_data:
332 |                         try:
333 |                             # Decode base64 image data
334 |                             image_bytes = base64.b64decode(image_data)
335 |                             image_stream = io.BytesIO(image_bytes)
336 |                             
337 |                             # Create reportlab image
338 |                             img = Image(image_stream)
339 |                             
340 |                             # Set a reasonable width/height if not specified
341 |                             if not hasattr(img, 'drawWidth') or not img.drawWidth:
342 |                                 img.drawWidth = 100
343 |                             if not hasattr(img, 'drawHeight') or not img.drawHeight:
344 |                                 img.drawHeight = 50
345 |                             
346 |                             # For images in paragraphs, we'll handle them specially
347 |                             if not in_header_footer:
348 |                                 return Paragraph(paragraph_text, para_style), img
349 |                         except Exception as e:
350 |                             print(f"Error processing image: {e}")
351 |                             # Add a placeholder text instead
352 |                             paragraph_text += "[GÖRSEL]"
353 |             
354 |             # Return the paragraph
355 |             return Paragraph(paragraph_text, para_style), None
356 |         
357 |         # Define header and footer
358 |         header_paragraphs = []
359 |         footer_paragraphs = []
360 |         
361 |         if header_element is not None:
362 |             header_bg_color = convert_color(header_element.get('background'))
363 |             header_fg_color = convert_color(header_element.get('foreground'))
364 |             
365 |             for para in header_element.findall('paragraph'):
366 |                 header_para, _ = process_paragraph(para, content_text, True)
367 |                 header_paragraphs.append(header_para)
368 |         
369 |         if footer_element is not None:
370 |             footer_bg_color = convert_color(footer_element.get('background'))
371 |             footer_fg_color = convert_color(footer_element.get('foreground'))
372 |             
373 |             for para in footer_element.findall('paragraph'):
374 |                 footer_para, _ = process_paragraph(para, content_text, True)
375 |                 footer_paragraphs.append(footer_para)
376 |         
377 |         # Create a function to draw the header and footer on each page
378 |         def add_header_footer(canvas, doc):
379 |             canvas.saveState()
380 |             
381 |             # Draw header
382 |             if header_paragraphs:
383 |                 # Draw header background if color specified
384 |                 if header_bg_color:
385 |                     canvas.setFillColor(header_bg_color)
386 |                     canvas.rect(
387 |                         doc.leftMargin, 
388 |                         doc.height + doc.topMargin - 20, 
389 |                         doc.width, 
390 |                         20, 
391 |                         fill=True, 
392 |                         stroke=False
393 |                     )
394 |                 
395 |                 # Draw header text
396 |                 for i, para in enumerate(header_paragraphs):
397 |                     w, h = para.wrap(doc.width, doc.topMargin)
398 |                     para.drawOn(canvas, doc.leftMargin, doc.height + doc.topMargin - 15 - i*h)
399 |             
400 |             # Draw footer
401 |             if footer_paragraphs:
402 |                 # Draw footer background if color specified
403 |                 if footer_bg_color:
404 |                     canvas.setFillColor(footer_bg_color)
405 |                     canvas.rect(
406 |                         doc.leftMargin, 
407 |                         doc.bottomMargin - 20, 
408 |                         doc.width, 
409 |                         20, 
410 |                         fill=True, 
411 |                         stroke=False
412 |                     )
413 |                 
414 |                 # Draw footer text
415 |                 for i, para in enumerate(footer_paragraphs):
416 |                     w, h = para.wrap(doc.width, doc.bottomMargin)
417 |                     para.drawOn(canvas, doc.leftMargin, doc.bottomMargin - 15 - i*h)
418 |             
419 |             # Draw background image if available
420 |             if bg_image:
421 |                 # Scale image to fit page with margins
422 |                 page_width = doc.width
423 |                 page_height = doc.height
424 |                 
425 |                 # Preserve aspect ratio
426 |                 img_ratio = bg_image.imageWidth / bg_image.imageHeight
427 |                 page_ratio = page_width / page_height
428 |                 
429 |                 if img_ratio > page_ratio:
430 |                     # Image is wider than page
431 |                     bg_image.drawWidth = page_width
432 |                     bg_image.drawHeight = page_width / img_ratio
433 |                 else:
434 |                     # Image is taller than page
435 |                     bg_image.drawHeight = page_height
436 |                     bg_image.drawWidth = page_height * img_ratio
437 |                 
438 |                 # Center the image
439 |                 x_offset = doc.leftMargin + (page_width - bg_image.drawWidth) / 2
440 |                 y_offset = doc.bottomMargin + (page_height - bg_image.drawHeight) / 2
441 |                 
442 |                 # Draw the image with transparency
443 |                 canvas.saveState()
444 |                 canvas.setFillAlpha(0.1)  # Set transparency
445 |                 bg_image.drawOn(canvas, x_offset, y_offset)
446 |                 canvas.restoreState()
447 |             
448 |             canvas.restoreState()
449 |         
450 |         content_buffer = content_text
451 |         
452 |         # Process each element in the XML
453 |         for elem in elements_element:
454 |             if elem.tag == 'paragraph':
455 |                 para, img = process_paragraph(elem, content_buffer)
456 |                 pdf_elements.append(para)
457 |                 if img:
458 |                     pdf_elements.append(img)
459 |                 pdf_elements.append(Spacer(1, 5))
460 |             elif elem.tag == 'page-break':
461 |                 pdf_elements.append(PageBreak())
462 |             elif elem.tag == 'table':
463 |                 # Create the table
464 |                 table_data = []
465 |                 rows = elem.findall('row')
466 |                 for row in rows:
467 |                     row_data = []
468 |                     cells = row.findall('cell')
469 |                     for cell in cells:
470 |                         # Process the cell content
471 |                         paragraphs = cell.findall('paragraph')
472 |                         cell_paragraphs = []
473 |                         
474 |                         for para in paragraphs:
475 |                             cell_para, cell_img = process_paragraph(para, content_buffer)
476 |                             cell_paragraphs.append(cell_para)
477 |                             if cell_img:
478 |                                 cell_paragraphs.append(cell_img)
479 |                         
480 |                         # Check if we have any paragraphs
481 |                         if cell_paragraphs:
482 |                             row_data.append(cell_paragraphs)
483 |                         else:
484 |                             # If no content, add an empty Paragraph
485 |                             row_data.append(Paragraph("", base_style))
486 |                     table_data.append(row_data)
487 |                 
488 |                 # Get table properties
489 |                 col_count = int(elem.get('columnCount', '1'))
490 |                 col_spans = elem.get('columnSpans', '').split(',')
491 |                 row_spans = elem.get('rowSpans', '').split(',')
492 |                 border_style = elem.get('border', 'borderCell')
493 |                 
494 |                 # Set column widths if available
495 |                 col_widths = None
496 |                 if col_spans and len(col_spans) == col_count:
497 |                     try:
498 |                         col_widths = [float(span) for span in col_spans]
499 |                     except ValueError:
500 |                         pass
501 |                 
502 |                 # Set the table style
503 |                 table_style = [
504 |                     ('VALIGN', (0,0), (-1,-1), 'TOP'),
505 |                     ('LEFTPADDING', (0,0), (-1,-1), 3),
506 |                     ('RIGHTPADDING', (0,0), (-1,-1), 3),
507 |                     ('TOPPADDING', (0,0), (-1,-1), 3),
508 |                     ('BOTTOMPADDING', (0,0), (-1,-1), 3),
509 |                 ]
510 |                 
511 |                 # Add grid/border based on style
512 |                 if border_style == 'borderCell' or border_style == 'border':
513 |                     table_style.append(('GRID', (0,0), (-1,-1), 1, colors.black))
514 |                 elif border_style == 'borderOuter':
515 |                     table_style.append(('BOX', (0,0), (-1,-1), 1, colors.black))
516 |                 
517 |                 table = Table(table_data, colWidths=col_widths)
518 |                 table.setStyle(TableStyle(table_style))
519 |                 pdf_elements.append(table)
520 |                 pdf_elements.append(Spacer(1, 5))
521 |             # Skip header and footer here as they're handled separately
522 |             elif elem.tag not in ['header', 'footer']:
523 |                 pass
524 |         
525 |         # Build the PDF document with header and footer
526 |         pdf.build(pdf_elements, onFirstPage=add_header_footer, onLaterPages=add_header_footer)
527 |         print(f"PDF file created: {pdf_file}")
528 |     else:
529 |         print("'elements' could not be found in the XML.")
530 | 
531 | def main():
532 |     if len(sys.argv) < 2:
533 |         print("Usage: python udf_to_pdf.py input.udf")
534 |         exit()
535 | 
536 |     udf_file = sys.argv[1]
537 | 
538 |     if not os.path.isfile(udf_file):
539 |         print(f"Input file not found: {udf_file}")
540 |         exit()
541 | 
542 |     filename, ext = os.path.splitext(udf_file)
543 | 
544 |     if ext.lower() == '.udf':
545 |         pdf_file = filename + '.pdf'
546 |         udf_to_pdf(udf_file, pdf_file)
547 |     else:
548 |         print("Please provide a .udf file.")
549 | 
550 | if __name__ == '__main__':
551 |     main()


--------------------------------------------------------------------------------
/udf_to_docx.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import os
  3 | import xml.etree.ElementTree as ET
  4 | from docx import Document
  5 | from docx.shared import Pt, RGBColor, Inches
  6 | from docx.enum.text import WD_ALIGN_PARAGRAPH, WD_UNDERLINE
  7 | from docx.oxml import parse_xml, OxmlElement
  8 | from docx.oxml.ns import nsdecls, qn
  9 | from docx.enum.section import WD_ORIENT
 10 | import base64
 11 | import io
 12 | import zipfile
 13 | 
 14 | def is_zip_file(file_path):
 15 |     """Check if the file is a valid ZIP file"""
 16 |     try:
 17 |         with zipfile.ZipFile(file_path, 'r') as z:
 18 |             return True
 19 |     except zipfile.BadZipFile:
 20 |         return False
 21 | 
 22 | def get_alignment_style(alignment_value):
 23 |     """Convert alignment value from XML to Word alignment constant"""
 24 |     if alignment_value == "1":
 25 |         return WD_ALIGN_PARAGRAPH.CENTER
 26 |     elif alignment_value == "3":
 27 |         return WD_ALIGN_PARAGRAPH.JUSTIFY
 28 |     elif alignment_value == "2":
 29 |         return WD_ALIGN_PARAGRAPH.RIGHT
 30 |     else:
 31 |         return WD_ALIGN_PARAGRAPH.LEFT
 32 | 
 33 | def convert_color(color_value):
 34 |     """Convert integer color value to RGBColor and return RGB values as a tuple"""
 35 |     if color_value is None:
 36 |         return None
 37 |     
 38 |     try:
 39 |         # Convert from negative integer to positive hex
 40 |         color_int = int(color_value)
 41 |         if color_int < 0:
 42 |             color_int = 0xFFFFFFFF + color_int + 1
 43 |         
 44 |         # Extract RGB values
 45 |         r = (color_int >> 16) & 0xFF
 46 |         g = (color_int >> 8) & 0xFF
 47 |         b = color_int & 0xFF
 48 |         
 49 |         # Return both the RGBColor object and the RGB values
 50 |         return (RGBColor(r, g, b), (r, g, b))
 51 |     except (ValueError, TypeError):
 52 |         return None
 53 | 
 54 | def add_page_number(paragraph):
 55 |     """Add a page number field to the paragraph"""
 56 |     run = paragraph.add_run()
 57 |     fldChar1 = OxmlElement('w:fldChar')
 58 |     fldChar1.set(qn('w:fldCharType'), 'begin')
 59 |     run._r.append(fldChar1)
 60 |     
 61 |     instrText = OxmlElement('w:instrText')
 62 |     instrText.set(qn('xml:space'), 'preserve')
 63 |     instrText.text = "PAGE"
 64 |     run._r.append(instrText)
 65 |     
 66 |     fldChar2 = OxmlElement('w:fldChar')
 67 |     fldChar2.set(qn('w:fldCharType'), 'end')
 68 |     run._r.append(fldChar2)
 69 | 
 70 | def set_cell_background(cell, color_info):
 71 |     """Set cell background color using direct XML manipulation"""
 72 |     color_obj, rgb_values = color_info
 73 |     shading_elm = OxmlElement('w:shd')
 74 |     # Convert RGB to hex
 75 |     hex_color = f"{rgb_values[0]:02X}{rgb_values[1]:02X}{rgb_values[2]:02X}"
 76 |     shading_elm.set(qn('w:fill'), hex_color)
 77 |     cell._tc.get_or_add_tcPr().append(shading_elm)
 78 | 
 79 | def process_background_image(document, bg_image_data, bg_image_source, output_file):
 80 |     """Process background image data and add to document background"""
 81 |     if bg_image_data:
 82 |         try:
 83 |             # Decode base64 image data
 84 |             image_bytes = base64.b64decode(bg_image_data)
 85 |             
 86 |             # Save the image to a file next to the output document
 87 |             temp_img_path = os.path.join(os.path.dirname(output_file), os.path.splitext(os.path.basename(output_file))[0] + "_background.png")
 88 |             with open(temp_img_path, "wb") as img_file:
 89 |                 img_file.write(image_bytes)
 90 |             
 91 |             # Unfortunately python-docx doesn't directly support setting background images
 92 |             # This requires OOXML manipulation, which is beyond the scope of this script
 93 |             print(f"Background image saved to {temp_img_path}. Please manually set it as document background in Word.")
 94 |             return True
 95 |         except Exception as e:
 96 |             print(f"Error processing background image data: {e}")
 97 |     elif bg_image_source:
 98 |         print(f"Background image source path: {bg_image_source}. Please manually set it as document background in Word.")
 99 |     return False
100 | 
101 | def udf_to_docx(udf_file, docx_file):
102 |     root = None
103 |     
104 |     # Check if the file is a ZIP file
105 |     if is_zip_file(udf_file):
106 |         # Process as a ZIP file
107 |         with zipfile.ZipFile(udf_file, 'r') as z:
108 |             if 'content.xml' in z.namelist():
109 |                 with z.open('content.xml') as content_file:
110 |                     tree = ET.parse(content_file, parser=ET.XMLParser(encoding='utf-8'))
111 |                     root = tree.getroot()
112 |             else:
113 |                 print("The 'content.xml' file could not be found in the UDF file.")
114 |                 exit()
115 |     else:
116 |         # Process as an XML file directly
117 |         try:
118 |             tree = ET.parse(udf_file, parser=ET.XMLParser(encoding='utf-8'))
119 |             root = tree.getroot()
120 |         except ET.ParseError:
121 |             print(f"The file {udf_file} is neither a valid ZIP nor a valid XML file.")
122 |             exit()
123 | 
124 |     if root is None:
125 |         print("Failed to parse the file.")
126 |         exit()
127 | 
128 |     # Create a new Word document
129 |     document = Document()
130 |     
131 |     # Ensure default headers and footers are created for all sections
132 |     for section in document.sections:
133 |         section.different_first_page = False
134 |         section.header.is_linked_to_previous = False
135 |         section.footer.is_linked_to_previous = False
136 | 
137 |     # Retrieve content text
138 |     content_element = root.find('content')
139 |     if content_element is not None:
140 |         content_text = content_element.text
141 |         if content_text.startswith('<![CDATA[') and content_text.endswith(']]>'):
142 |             content_text = content_text[9:-3]
143 |     else:
144 |         print("'content' could not be found in the XML.")
145 |         exit()
146 | 
147 |     # Extract page properties
148 |     properties_element = root.find('properties')
149 |     page_format = properties_element.find('pageFormat') if properties_element is not None else None
150 |     
151 |     # Get page margins
152 |     if page_format is not None:
153 |         left_margin = float(page_format.get('leftMargin', '42.5')) / 72 * Inches(1).pt  # Convert to Word points
154 |         right_margin = float(page_format.get('rightMargin', '42.5')) / 72 * Inches(1).pt
155 |         top_margin = float(page_format.get('topMargin', '42.5')) / 72 * Inches(1).pt
156 |         bottom_margin = float(page_format.get('bottomMargin', '42.5')) / 72 * Inches(1).pt
157 |         
158 |         # Set page margins for each section
159 |         for section in document.sections:
160 |             section.left_margin = Pt(left_margin)
161 |             section.right_margin = Pt(right_margin)
162 |             section.top_margin = Pt(top_margin)
163 |             section.bottom_margin = Pt(bottom_margin)
164 |             
165 |             # Set page orientation
166 |             orientation = page_format.get('paperOrientation', '1')
167 |             if orientation == '2':  # Landscape
168 |                 section.orientation = WD_ORIENT.LANDSCAPE
169 |             else:  # Portrait
170 |                 section.orientation = WD_ORIENT.PORTRAIT
171 |     
172 |     # Get background image if available
173 |     if properties_element is not None:
174 |         bg_image_elem = properties_element.find('bgImage')
175 |         if bg_image_elem is not None:
176 |             bg_image_data = bg_image_elem.get('bgImageData')
177 |             bg_image_source = bg_image_elem.get('bgImageSource')
178 |             process_background_image(document, bg_image_data, bg_image_source, docx_file)
179 | 
180 |     # Process the 'elements' section
181 |     elements_element = root.find('elements')
182 |     if elements_element is not None:
183 |         # Get header and footer elements
184 |         header_element = elements_element.find('header')
185 |         footer_element = elements_element.find('footer')
186 |         
187 |         # Process header
188 |         if header_element is not None:
189 |             # Get the header from the first section
190 |             section = document.sections[0]
191 |             header = section.header
192 |             
193 |             # Clear existing header paragraphs
194 |             for p in header.paragraphs:
195 |                 p._element.getparent().remove(p._element)
196 |                 p._p = None
197 |                 p._element = None
198 |             
199 |             # Create new header paragraph
200 |             header_para = header.add_paragraph()
201 |             
202 |             # Set header background color
203 |             header_color_result = convert_color(header_element.get('background'))
204 |             if header_color_result:
205 |                 header_bg_color, rgb_values = header_color_result
206 |                 # Save background color info for manual formatting
207 |                 print(f"Header background color: RGB({rgb_values[0]}, {rgb_values[1]}, {rgb_values[2]}) - Please set it manually in Word.")
208 |             
209 |             # Process header paragraphs
210 |             for para_elem in header_element.findall('paragraph'):
211 |                 if para_elem is not header_element.findall('paragraph')[0]:
212 |                     header_para = header.add_paragraph()
213 |                 
214 |                 # Set alignment
215 |                 alignment = para_elem.get('Alignment', '0')
216 |                 header_para.alignment = get_alignment_style(alignment)
217 |                 
218 |                 # Process content
219 |                 for child in para_elem:
220 |                     if child.tag == 'content':
221 |                         start_offset = int(child.get('startOffset', '0'))
222 |                         length = int(child.get('length', '0'))
223 |                         text = content_text[start_offset:start_offset+length]
224 |                         
225 |                         run = header_para.add_run(text)
226 |                         
227 |                         # Always use DejaVuSerif as default font
228 |                         run.font.name = "DejaVuSerif"
229 |                         
230 |                         # Set font size
231 |                         size = child.get('size')
232 |                         if size:
233 |                             run.font.size = Pt(float(size))
234 |                         
235 |                         # Set formatting
236 |                         bold = child.get('bold', 'false') == 'true'
237 |                         italic = child.get('italic', 'false') == 'true'
238 |                         underline = child.get('underline', 'false') == 'true'
239 |                         
240 |                         run.bold = bold
241 |                         run.italic = italic
242 |                         if underline:
243 |                             run.underline = WD_UNDERLINE.SINGLE
244 |                         
245 |                         # Set color
246 |                         foreground_result = convert_color(child.get('foreground'))
247 |                         if foreground_result:
248 |                             # Extract just the RGBColor object, not the tuple
249 |                             foreground = foreground_result[0]
250 |                             run.font.color.rgb = foreground
251 |         
252 |         # Process footer
253 |         if footer_element is not None:
254 |             # Get the footer from the first section
255 |             section = document.sections[0]
256 |             footer = section.footer
257 |             
258 |             # Clear existing footer paragraphs
259 |             for p in footer.paragraphs:
260 |                 p._element.getparent().remove(p._element)
261 |                 p._p = None
262 |                 p._element = None
263 |             
264 |             # Create new footer paragraph
265 |             footer_para = footer.add_paragraph()
266 |             
267 |             # Process footer background color
268 |             footer_color_result = convert_color(footer_element.get('background'))
269 |             if footer_color_result:
270 |                 footer_bg_color, rgb_values = footer_color_result
271 |                 print(f"Footer background color: RGB({rgb_values[0]}, {rgb_values[1]}, {rgb_values[2]}) - Please set it manually in Word.")
272 |             
273 |             # Process footer paragraphs
274 |             for para_elem in footer_element.findall('paragraph'):
275 |                 if para_elem is not footer_element.findall('paragraph')[0]:
276 |                     footer_para = footer.add_paragraph()
277 |                 
278 |                 # Set alignment
279 |                 alignment = para_elem.get('Alignment', '0')
280 |                 footer_para.alignment = get_alignment_style(alignment)
281 |                 
282 |                 # Process content
283 |                 for child in para_elem:
284 |                     if child.tag == 'content':
285 |                         start_offset = int(child.get('startOffset', '0'))
286 |                         length = int(child.get('length', '0'))
287 |                         text = content_text[start_offset:start_offset+length]
288 |                         
289 |                         run = footer_para.add_run(text)
290 |                         
291 |                         # Always use DejaVuSerif as default font
292 |                         run.font.name = "DejaVuSerif"
293 |                         
294 |                         # Set font size
295 |                         size = child.get('size')
296 |                         if size:
297 |                             run.font.size = Pt(float(size))
298 |                         
299 |                         # Set formatting
300 |                         bold = child.get('bold', 'false') == 'true'
301 |                         italic = child.get('italic', 'false') == 'true'
302 |                         underline = child.get('underline', 'false') == 'true'
303 |                         
304 |                         run.bold = bold
305 |                         run.italic = italic
306 |                         if underline:
307 |                             run.underline = WD_UNDERLINE.SINGLE
308 |                         
309 |                         # Set color
310 |                         foreground_result = convert_color(child.get('foreground'))
311 |                         if foreground_result:
312 |                             # Extract just the RGBColor object, not the tuple
313 |                             foreground = foreground_result[0]
314 |                             run.font.color.rgb = foreground
315 |                             
316 |             # Add page number if needed (optional)
317 |             # This can be uncommented if page numbers are required in the footer
318 |             # add_page_number(footer_para)
319 |         
320 |         # Process each element in the document body
321 |         for elem in elements_element:
322 |             if elem.tag == 'paragraph':
323 |                 # Create the paragraph
324 |                 paragraph = document.add_paragraph()
325 | 
326 |                 # Set paragraph alignment
327 |                 alignment = elem.get('Alignment', '0')
328 |                 paragraph.alignment = get_alignment_style(alignment)
329 |                 
330 |                 # Set paragraph indentation
331 |                 left_indent = elem.get('LeftIndent')
332 |                 right_indent = elem.get('RightIndent')
333 |                 first_line_indent = elem.get('FirstLineIndent')
334 |                 
335 |                 if left_indent:
336 |                     paragraph.paragraph_format.left_indent = Pt(float(left_indent))
337 |                 if right_indent:
338 |                     paragraph.paragraph_format.right_indent = Pt(float(right_indent))
339 |                 if first_line_indent:
340 |                     paragraph.paragraph_format.first_line_indent = Pt(float(first_line_indent))
341 |                 
342 |                 # Set line spacing
343 |                 line_spacing = elem.get('LineSpacing')
344 |                 if line_spacing:
345 |                     paragraph.paragraph_format.line_spacing = float(line_spacing)
346 | 
347 |                 # Process the paragraph content
348 |                 for child in elem:
349 |                     if child.tag == 'content':
350 |                         # Get and format the text
351 |                         start_offset = int(child.get('startOffset', '0'))
352 |                         length = int(child.get('length', '0'))
353 |                         text = content_text[start_offset:start_offset+length]
354 | 
355 |                         run = paragraph.add_run(text)
356 | 
357 |                         # Always use DejaVuSerif as font
358 |                         run.font.name = "DejaVuSerif"
359 | 
360 |                         # Set the font size
361 |                         size = child.get('size')
362 |                         if size:
363 |                             run.font.size = Pt(float(size))
364 | 
365 |                         # Set text formatting
366 |                         bold = child.get('bold', 'false') == 'true'
367 |                         italic = child.get('italic', 'false') == 'true'
368 |                         underline = child.get('underline', 'false') == 'true'
369 |                         
370 |                         run.bold = bold
371 |                         run.italic = italic
372 |                         if underline:
373 |                             run.underline = WD_UNDERLINE.SINGLE
374 | 
375 |                         # Set the color
376 |                         foreground_result = convert_color(child.get('foreground'))
377 |                         if foreground_result:
378 |                             # Extract just the RGBColor object, not the tuple
379 |                             foreground = foreground_result[0]
380 |                             run.font.color.rgb = foreground
381 | 
382 |                     elif child.tag == 'field':
383 |                         # Process field element (labels like DAVACI, VEKİLİ, etc.)
384 |                         field_name = child.get('fieldName', '')
385 |                         
386 |                         # Get the text from the content buffer if startOffset and length are provided
387 |                         if child.get('startOffset') and child.get('length'):
388 |                             start_offset = int(child.get('startOffset', '0'))
389 |                             length = int(child.get('length', '0'))
390 |                             field_text = content_text[start_offset:start_offset+length]
391 |                         else:
392 |                             # Use the fieldName as fallback
393 |                             field_text = field_name
394 |                         
395 |                         run = paragraph.add_run(field_text)
396 |                         
397 |                         # Always use DejaVuSerif as font
398 |                         run.font.name = "DejaVuSerif"
399 |                         
400 |                         # Set formatting based on attributes
401 |                         run.bold = child.get('bold', 'false') == 'true'
402 |                         run.italic = child.get('italic', 'false') == 'true'
403 |                         if child.get('underline', 'false') == 'true':
404 |                             run.underline = WD_UNDERLINE.SINGLE
405 |                         
406 |                         # Set color if available
407 |                         foreground_result = convert_color(child.get('foreground'))
408 |                         if foreground_result:
409 |                             # Extract just the RGBColor object, not the tuple
410 |                             foreground = foreground_result[0]
411 |                             run.font.color.rgb = foreground
412 |                             
413 |                     elif child.tag == 'space':
414 |                         # Add a space
415 |                         run = paragraph.add_run(" ")
416 |                         
417 |                     elif child.tag == 'image':
418 |                         # Add an image
419 |                         image_data = child.get('imageData')
420 |                         if image_data:
421 |                             image_bytes = base64.b64decode(image_data)
422 |                             image_stream = io.BytesIO(image_bytes)
423 |                             run = paragraph.add_run()
424 |                             run.add_picture(image_stream)
425 |                             
426 |             elif elem.tag == 'page-break':
427 |                 # Add page break
428 |                 document.add_page_break()
429 |                 
430 |             elif elem.tag == 'table':
431 |                 # Create the table
432 |                 column_count = int(elem.get('columnCount', '1'))
433 |                 rows = elem.findall('row')
434 |                 
435 |                 # Get column widths if specified
436 |                 col_widths = []
437 |                 col_spans = elem.get('columnSpans', '')
438 |                 if col_spans:
439 |                     try:
440 |                         col_spans_list = col_spans.split(',')
441 |                         if len(col_spans_list) == column_count:
442 |                             for span in col_spans_list:
443 |                                 col_widths.append(Pt(float(span)))
444 |                     except (ValueError, IndexError):
445 |                         col_widths = []
446 |                 
447 |                 # Create table
448 |                 table = document.add_table(rows=len(rows), cols=column_count)
449 |                 
450 |                 # Set border style
451 |                 border_style = elem.get('border', 'borderCell')
452 |                 if border_style in ['borderCell', 'border']:
453 |                     # Add borders to all cells
454 |                     table.style = 'Table Grid'
455 |                 elif border_style == 'borderOuter':
456 |                     # Only outer borders
457 |                     table.style = 'Table Grid'
458 |                     # Would need more complex XML manipulation to properly implement 'borderOuter'
459 |                     
460 |                 # Process table rows and cells
461 |                 for row_idx, row in enumerate(rows):
462 |                     # Set row height if specified
463 |                     row_height = row.get('height_min')
464 |                     if row_height:
465 |                         table.rows[row_idx].height = Pt(float(row_height) * 72)  # Convert to points
466 |                         
467 |                     cells = row.findall('cell')
468 |                     for col_idx, cell in enumerate(cells):
469 |                         # Ensure we don't exceed column count
470 |                         if col_idx >= column_count:
471 |                             continue
472 |                             
473 |                         # Get the table cell
474 |                         table_cell = table.rows[row_idx].cells[col_idx]
475 |                         
476 |                         # Process cell paragraphs
477 |                         paragraphs = cell.findall('paragraph')
478 |                         
479 |                         # Use existing paragraph if possible
480 |                         cell_paragraph = table_cell.paragraphs[0] if table_cell.paragraphs else table_cell.add_paragraph()
481 |                         
482 |                         for para_idx, para in enumerate(paragraphs):
483 |                             # Add a new paragraph for subsequent paragraphs
484 |                             if para_idx > 0:
485 |                                 cell_paragraph = table_cell.add_paragraph()
486 |                                 
487 |                             # Set paragraph alignment
488 |                             alignment = para.get('Alignment', '0')
489 |                             cell_paragraph.alignment = get_alignment_style(alignment)
490 |                             
491 |                             # Set paragraph indentation
492 |                             left_indent = para.get('LeftIndent')
493 |                             right_indent = para.get('RightIndent')
494 |                             
495 |                             if left_indent:
496 |                                 cell_paragraph.paragraph_format.left_indent = Pt(float(left_indent))
497 |                             if right_indent:
498 |                                 cell_paragraph.paragraph_format.right_indent = Pt(float(right_indent))
499 |                             
500 |                             # Process paragraph content
501 |                             for child in para:
502 |                                 if child.tag == 'content':
503 |                                     # Get and format the text
504 |                                     start_offset = int(child.get('startOffset', '0'))
505 |                                     length = int(child.get('length', '0'))
506 |                                     text = content_text[start_offset:start_offset+length]
507 | 
508 |                                     run = cell_paragraph.add_run(text)
509 | 
510 |                                     # Always use DejaVuSerif as font
511 |                                     run.font.name = "DejaVuSerif"
512 | 
513 |                                     # Set the font size
514 |                                     size = child.get('size')
515 |                                     if size:
516 |                                         run.font.size = Pt(float(size))
517 | 
518 |                                     # Set text formatting
519 |                                     bold = child.get('bold', 'false') == 'true'
520 |                                     italic = child.get('italic', 'false') == 'true'
521 |                                     underline = child.get('underline', 'false') == 'true'
522 |                                     
523 |                                     run.bold = bold
524 |                                     run.italic = italic
525 |                                     if underline:
526 |                                         run.underline = WD_UNDERLINE.SINGLE
527 | 
528 |                                     # Set the color
529 |                                     foreground_result = convert_color(child.get('foreground'))
530 |                                     if foreground_result:
531 |                                         # Extract just the RGBColor object, not the tuple
532 |                                         foreground = foreground_result[0]
533 |                                         run.font.color.rgb = foreground
534 | 
535 |                                 elif child.tag == 'field':
536 |                                     # Process field element
537 |                                     field_name = child.get('fieldName', '')
538 |                                     
539 |                                     # Get the text from the content buffer
540 |                                     if child.get('startOffset') and child.get('length'):
541 |                                         start_offset = int(child.get('startOffset', '0'))
542 |                                         length = int(child.get('length', '0'))
543 |                                         field_text = content_text[start_offset:start_offset+length]
544 |                                     else:
545 |                                         # Use the fieldName as fallback
546 |                                         field_text = field_name
547 |                                     
548 |                                     run = cell_paragraph.add_run(field_text)
549 |                                     
550 |                                     # Always use DejaVuSerif as font
551 |                                     run.font.name = "DejaVuSerif"
552 |                                     
553 |                                     # Set formatting
554 |                                     run.bold = child.get('bold', 'false') == 'true'
555 |                                     run.italic = child.get('italic', 'false') == 'true'
556 |                                     if child.get('underline', 'false') == 'true':
557 |                                         run.underline = WD_UNDERLINE.SINGLE
558 |                                     
559 |                                     # Set color if available
560 |                                     foreground_result = convert_color(child.get('foreground'))
561 |                                     if foreground_result:
562 |                                         # Extract just the RGBColor object, not the tuple
563 |                                         foreground = foreground_result[0]
564 |                                         run.font.color.rgb = foreground
565 |                                         
566 |                                 elif child.tag == 'space':
567 |                                     # Add a space
568 |                                     cell_paragraph.add_run(" ")
569 |                                 elif child.tag == 'image':
570 |                                     # Add an image
571 |                                     image_data = child.get('imageData')
572 |                                     if image_data:
573 |                                         try:
574 |                                             image_bytes = base64.b64decode(image_data)
575 |                                             image_stream = io.BytesIO(image_bytes)
576 |                                             run = cell_paragraph.add_run()
577 |                                             run.add_picture(image_stream)
578 |                                         except Exception as e:
579 |                                             print(f"Error processing image in table: {e}")
580 |                                             cell_paragraph.add_run("[GÖRSEL]")
581 |     else:
582 |         print("'elements' could not be found in the XML.")
583 |         exit()
584 | 
585 |     # Save the document
586 |     document.save(docx_file)
587 |     print(f"DOCX file created: {docx_file}")
588 | 
589 | def main():
590 |     if len(sys.argv) < 2:
591 |         print("Usage: python udf_to_docx.py input.udf")
592 |         exit()
593 | 
594 |     udf_file = sys.argv[1]
595 | 
596 |     if not os.path.isfile(udf_file):
597 |         print(f"Input file not found: {udf_file}")
598 |         exit()
599 | 
600 |     filename, ext = os.path.splitext(udf_file)
601 | 
602 |     if ext.lower() == '.udf':
603 |         docx_file = filename + '.docx'
604 |         udf_to_docx(udf_file, docx_file)
605 |     else:
606 |         print("Please provide a .udf file.")
607 | 
608 | if __name__ == '__main__':
609 |     main()


--------------------------------------------------------------------------------