├── .gitignore
├── DataConverter.py
├── DataConverter.sublime-commands
├── DataConverter.sublime-settings
├── Main.sublime-menu
├── README.md
├── messages.json
└── messages
├── 1.1.0.txt
├── 1.1.2.txt
├── 1.1.3.txt
└── 1.2.txt
/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fitnr/SublimeDataConverter/8f25477f74d236670f9c56b6f732e728c624e334/.gitignore
--------------------------------------------------------------------------------
/DataConverter.py:
--------------------------------------------------------------------------------
1 | import csv
2 | import json
3 | import os
4 | import re
5 | import unicodedata
6 | from itertools import chain, zip_longest
7 | from pprint import pformat
8 |
9 | import _csv
10 | import sublime
11 | import sublime_plugin
12 |
13 | try:
14 | import io
15 | except ImportError as e:
16 | import StringIO as io
17 |
18 |
19 | """
20 | DataConverter package for Sublime Text
21 | https://github.com/fitnr/SublimeDataConverter
22 |
23 | Freely adapted from Mr. Data Converter: http://shancarter.com/data_converter/
24 | """
25 | LINEENDINGS = {
26 | "Unix": "\n",
27 | "Windows": "\r\n",
28 | "CR": "\r",
29 | "LF": "\n",
30 | }
31 |
32 | # Borrowed from Apply Syntax
33 |
34 |
35 | def sublime_format_path(pth):
36 | m = re.match(r"^([A-Za-z]{1}):(?:/|\\)(.*)", pth)
37 | if sublime.platform() == "windows" and m is not None:
38 | pth = m.group(1) + "/" + m.group(2)
39 | return pth.replace("\\", "/")
40 |
41 |
42 | def get_type(datum):
43 | """Select a data type from a (string) input"""
44 | try:
45 | int(datum)
46 | return int
47 | except ValueError:
48 | try:
49 | float(datum)
50 | return float
51 | except ValueError:
52 | return str
53 |
54 | except TypeError:
55 | return type(None)
56 |
57 |
58 | def parse_types(reader):
59 | """Return a list containing a best guess for the types of data in each column."""
60 | output_types, types = [], []
61 |
62 | for _ in range(10):
63 | try:
64 | row = next(reader)
65 |
66 | except StopIteration:
67 | break
68 |
69 | except Exception as e:
70 | print("DataConverter: Error parsing", e)
71 | break
72 |
73 | types.append([get_type(r) for r in row])
74 |
75 | # rotate the array
76 | types = list(zip(*types))
77 |
78 | for type_list in types:
79 | if str in type_list:
80 | output_types.append(str)
81 | elif float in type_list:
82 | output_types.append(float)
83 | else:
84 | output_types.append(int)
85 |
86 | return output_types
87 |
88 |
89 | def set_dialect(dialectname, user_dialects):
90 | """Get a CSV dialect from csv.dialects or a register one from passed dict."""
91 | try:
92 | csv.get_dialect(dialectname)
93 | return dialectname
94 |
95 | except _csv.Error:
96 | try:
97 | user_quoting = user_dialects[dialectname].pop("quoting", "QUOTE_MINIMAL")
98 |
99 | quoting = getattr(csv, user_quoting, csv.QUOTE_MINIMAL)
100 |
101 | csv.register_dialect(
102 | dialectname, quoting=quoting, **user_dialects[dialectname]
103 | )
104 |
105 | print("DataConverter: Using custom dialect", dialectname)
106 | return dialectname
107 |
108 | except _csv.Error:
109 | print("DataConverter: Couldn't register custom dialect named", dialectname)
110 | return None
111 |
112 |
113 | def sniff(sample):
114 | try:
115 | dialect = csv.Sniffer().sniff(sample)
116 | csv.register_dialect("sniffed", dialect)
117 | print("DataConverter: using sniffed dialect with delimiter:", dialect.delimiter)
118 | return "sniffed"
119 |
120 | except _csv.Error:
121 | return "excel"
122 |
123 |
124 | def _mysql_type(t):
125 | if t == str:
126 | return "VARCHAR(255)"
127 | elif t == float:
128 | return "FLOAT"
129 | elif t == int:
130 | return "INT"
131 | else:
132 | return "TEXT"
133 |
134 |
135 | def _sqlite_type(t):
136 | if t == float:
137 | return "REAL"
138 | elif t == int:
139 | return "INTEGER"
140 | else:
141 | return "TEXT"
142 |
143 |
144 | def _postgres_type(t):
145 | if t == float:
146 | return "numeric"
147 | elif t == int:
148 | return "integer"
149 | else:
150 | return "text"
151 |
152 |
153 | def _escape(string):
154 | """Escape &, < and >"""
155 | return string.replace("<", "<").replace(">", ">")
156 |
157 |
158 | def _length(x):
159 | try:
160 | return len(str(x))
161 | except TypeError:
162 | return 0
163 |
164 |
165 | def _cast(value, typ_):
166 | try:
167 | return typ_(value)
168 | except TypeError:
169 | return value
170 |
171 |
172 | def _countcombining(string):
173 | """Count combining diacretics in a string."""
174 | return sum(unicodedata.combining(c) > 0 for c in string)
175 |
176 |
177 | def _countwide(string):
178 | """Count the numer of wide characters in a string."""
179 | return sum(unicodedata.east_asian_width(char) == "W" for char in string)
180 |
181 |
182 | # Adding a format? Check if it belongs in no_space_formats or untyped_formats.
183 |
184 |
185 | class DataConverterCommand(sublime_plugin.TextCommand):
186 |
187 | # This will be set later on, in the converter function
188 | syntax = None
189 | settings = dict()
190 | escapechar = "\\"
191 | quotechar = "'"
192 | headers = []
193 | converter = None
194 |
195 | # These format can't have spaces in field names. By default, spaces replaced with "_".
196 | no_space_formats = (
197 | "actionscript",
198 | "javascript",
199 | "mysql",
200 | "sqlite",
201 | "xml",
202 | "xml_properties",
203 | "yaml",
204 | )
205 |
206 | # These formats don't need to be checked for int/str/etc types.
207 | untyped_formats = (
208 | "dsv",
209 | "gherkin",
210 | "html",
211 | "jira",
212 | "json",
213 | "json_columns",
214 | "json_rows",
215 | "json_keyed",
216 | "text_table",
217 | "wiki",
218 | "xml",
219 | "xml_properties",
220 | "yaml",
221 | )
222 |
223 | def run(self, edit, **kwargs):
224 | try:
225 | # The format key in .sublime-commands must match the name of the function we want to call.
226 | self.converter = getattr(self, kwargs["format"])
227 |
228 | except KeyError:
229 | print("DataConverter: no format given")
230 | return
231 |
232 | try:
233 | self.settings = self.get_settings(kwargs)
234 |
235 | except TypeError as e:
236 | print("DataConverter: TypeError fetching settings", e)
237 | return
238 |
239 | # If nothing is selected, select all.
240 | if self.view.sel()[0].empty():
241 | self.view.sel().add(sublime.Region(0, self.view.size()))
242 | deselect_flag = True
243 |
244 | for sel in self.view.sel():
245 | selection = self.view.substr(sel)
246 | sample = selection[:2048]
247 |
248 | # CSV dialect
249 | # Sniff if we haven't done this before, or we sniffed before.
250 | if "dialect" not in self.settings or self.settings["dialect"] == "sniffed":
251 | self.settings["dialect"] = sniff(sample)
252 | print("DataConverter: using dialect", self.settings["dialect"])
253 |
254 | self.headers = self.assign_headers(sample)
255 | data = self.import_csv(selection)
256 |
257 | if self.settings["typed"]:
258 | # Assign a list of tuples (headername, type)
259 | self.settings["types"] = self.get_types(selection)
260 | print(
261 | "DataConverter found these fields and types:",
262 | self.settings["types"],
263 | )
264 |
265 | # Run converter
266 | converted = self.converter(data)
267 | self.view.replace(edit, sel, converted)
268 | deselect_flag = False
269 |
270 | if self.syntax is not None:
271 | self.view.assign_syntax(self.syntax)
272 |
273 | if deselect_flag or self.settings.get("deselect_after"):
274 | self.deselect()
275 |
276 | def get_settings(self, kwargs):
277 | """Get settings from kwargs, user settings."""
278 | settings = dict()
279 | user_settings = sublime.load_settings("DataConverter.sublime-settings")
280 |
281 | # Headers
282 | # True, "sniff" or "never"
283 | settings["headers"] = user_settings.get("headers")
284 |
285 | # Whitespace
286 | # Combine headers for certain formats
287 | settings["mergeheaders"] = kwargs["format"] in self.no_space_formats
288 |
289 | # Typing
290 | # Don't like having 'not' in this expression, but it makes more sense to use
291 | # 'typed' from here on out, and it's less error prone to use the (smaller)
292 | # list of untyped formats.
293 | settings["typed"] = kwargs["format"] not in self.untyped_formats
294 |
295 | # New lines
296 | settings["newline"] = LINEENDINGS.get(self.view.line_endings(), os.linesep)
297 |
298 | user_quoting = user_settings.get("quoting", "QUOTE_MINIMAL")
299 | settings["quoting"] = getattr(csv, user_quoting, csv.QUOTE_MINIMAL)
300 |
301 | # Quote stripping
302 | settings["strip_quotes"] = user_settings.get("strip_quotes", True)
303 |
304 | # Indentation
305 | if self.view.settings().get("translate_tabs_to_spaces"):
306 | tabsize = int(self.view.settings().get("tab_size", 4))
307 | settings["indent"] = " " * tabsize
308 | else:
309 | settings["indent"] = "\t"
310 |
311 | # header-joiner
312 | settings["header_joiner"] = user_settings.get("header_joiner", "")
313 |
314 | # HTML characters
315 | settings["html_utf8"] = user_settings.get("html_utf8", True)
316 |
317 | # Dialect
318 | if user_settings.get("use_dialect", False):
319 | settings["dialect"] = set_dialect(
320 | user_settings.get("use_dialect"), user_settings.get("dialects", {})
321 | )
322 |
323 | settings["default_variable"] = user_settings.get(
324 | "default_variable", "DataConverter"
325 | )
326 |
327 | # These settings are solely for DSV converter.
328 | settings["output_delimiter"] = kwargs.get("output_delimiter")
329 | settings["output_dialect"] = kwargs.get("output_dialect")
330 |
331 | return settings
332 |
333 | def assign_headers(self, sample):
334 | """Assign headers to the data set"""
335 | # Use the dialect to get the first line of the sample as a dict
336 | # Do this here beacause we'll want the length of the data no matter what
337 | sample_io = io.StringIO(sample)
338 | headers = next(csv.reader(sample_io, dialect=self.settings["dialect"]))
339 |
340 | if self.settings["headers"] is True:
341 | self.settings["has_header"] = True
342 |
343 | elif self.settings["headers"] == "never" or self.settings["headers"] is False:
344 | self.settings["has_header"] = False
345 |
346 | else:
347 | # If not told to definitely try to use headers or definitely not, we sniff for them.
348 | # Sniffing isn't perfect, especially with short data sets and strange delimiters
349 | try:
350 | sniffed_headers = csv.Sniffer().has_header(sample)
351 |
352 | if sniffed_headers:
353 | self.settings["has_header"] = True
354 | print("DataConverter: CSV Sniffer found headers")
355 | else:
356 | self.settings["has_header"] = False
357 | print("DataConverter: CSV Sniffer didn't find headers")
358 |
359 | except _csv.Error:
360 | print(
361 | "DataConverter: CSV module had trouble sniffing for headers. Assuming they exist."
362 | )
363 | print(
364 | 'DataConverter: Add "headers": false to your settings file to assume no headers.'
365 | )
366 | self.settings["has_header"] = True
367 |
368 | # Using ['val1', 'val2', ...] if 'headers=never' or Sniffer says there aren't headers
369 | if self.settings.get("has_header") is False:
370 | headers = ["val{}".format(x) for x in range(1, 1 + len(headers))]
371 |
372 | return self.format_headers(headers)
373 |
374 | def format_headers(self, headers):
375 | """Replace spaces in the header names for some formats."""
376 | if self.settings.get("mergeheaders", False) is True:
377 | hj = self.settings.get("header_joiner", "_")
378 | headers = [x.replace(" ", hj) for x in headers]
379 |
380 | return headers
381 |
382 | def import_csv(self, selection):
383 | """Read CSV data from file into a StringIO object."""
384 | # Remove header from entries that came with one.
385 | if self.settings.get("has_header", False) is True:
386 | selection = selection[selection.find(self.settings["newline"]) :]
387 |
388 | csvIO = io.StringIO(selection)
389 |
390 | reader = csv.reader(csvIO, dialect=self.settings["dialect"])
391 |
392 | if self.settings.get("has_header", False) is True:
393 | next(reader)
394 |
395 | return reader
396 |
397 | def get_types(self, selection):
398 | # If untyped, return empty list.
399 | if self.settings.get("typed", False) is False:
400 | return []
401 |
402 | if self.settings.get("has_header", False) is True:
403 | selection = selection[selection.find(self.settings["newline"]) :]
404 |
405 | typer = csv.reader(io.StringIO(selection), dialect=self.settings["dialect"])
406 | if self.settings.get("has_header", False) is True:
407 | next(typer)
408 |
409 | return list(parse_types(typer))
410 |
411 | def deselect(self):
412 | """Remove selection and place pointer at top of document (adapted from https://gist.github.com/1608283)."""
413 | top = self.view.sel()[0].a
414 | self.view.sel().clear()
415 | self.view.sel().add(sublime.Region(top, top))
416 |
417 | def set_syntax(self, path, file_name=False):
418 | """Set the view's syntax"""
419 | if not file_name:
420 | file_name = path
421 |
422 | new_syntax = sublime_format_path(
423 | "/".join(("Packages", path, file_name + ".sublime-syntax"))
424 | )
425 | current_syntax = self.view.settings().get("syntax")
426 |
427 | if new_syntax != current_syntax:
428 | try:
429 | sublime.load_resource(new_syntax)
430 | self.view.assign_syntax(new_syntax)
431 | except OSError as err:
432 | print("DataConverter: Unable to set syntax ({}).".format(err))
433 | new_syntax = new_syntax.replace(".sublime-syntax", ".tmLanguage")
434 | try:
435 | sublime.load_resource(new_syntax)
436 | self.view.assign_syntax(new_syntax)
437 | except OSError as err:
438 | print("DataConverter: Unable to set syntax ({}).".format(err))
439 | print("self.view.syntax().path: {}".format(self.view.syntax().path))
440 |
441 | def _escape(self, string):
442 | """Add an escape character in front of a quote character in given string."""
443 | return (string or "").replace(self.quotechar, self.escapechar + self.quotechar)
444 |
445 | def type_loop(self, row, field_format, field_break=None, null=None):
446 | """
447 | Helper loop for checking types as we write out a row.
448 | Strings get quoted, floats and ints don't.
449 |
450 | Args:
451 | row (list): returned from csv.reader
452 | field_format (str): format for a single field
453 | field_break (str): break between fields (default: ', ').
454 | null (str): Text to use for None values (default: 'null').
455 |
456 | Returns:
457 | str formatted with `formt` (e.g. "{0}=>{1}")
458 | """
459 | field_break = field_break or ", "
460 | null = null or "null"
461 | types = self.settings.get("types", [])
462 |
463 | # Creates escaped or proper NULL representation of a value.
464 | def applytype(val, typ):
465 | if val is None:
466 | return null
467 | elif typ == str:
468 | return "{q}{}{q}".format(self._escape(val), q=self.quotechar)
469 | else:
470 | return _cast(val, typ)
471 |
472 | return field_break.join(
473 | field_format.format(field=hed, value=applytype(val, typ), q=self.quotechar)
474 | for val, hed, typ in zip_longest(row, self.headers, types)
475 | )
476 |
477 | # Converters
478 | # Note that converters should call self.set_syntax
479 |
480 | def actionscript(self, data):
481 | """Actionscript converter"""
482 | self.set_syntax("ActionScript")
483 | n = self.settings["newline"] + self.settings["indent"]
484 | linebreak = "}," + n + "{"
485 | output = linebreak.join(
486 | self.type_loop(row, "{field}: {value}", field_break=", ") for row in data
487 | )
488 | return "[" + n + "{" + output + "}" + self.settings["newline"] + "];"
489 |
490 | # ASP / VBScript
491 | def asp(self, data):
492 | self.set_syntax("ASP")
493 | # comment, comment_end = "'", ""
494 | output = []
495 | C, r = 0, 0
496 | cell = self.settings["default_variable"] + "({c},{r})"
497 |
498 | for r, row in enumerate(data):
499 | for c, (value, typ) in enumerate(zip_longest(row, self.settings["types"])):
500 | typ = typ or get_type(value)
501 | arr = "{q}{}{q}" if typ == str else "{}"
502 | v = self._escape(value or "null")
503 | output.append(
504 | (cell + " = " + arr).format(v, c=c, r=r, q=self.quotechar)
505 | )
506 |
507 | C = max(C, len(row))
508 |
509 | dim = "Dim " + cell.format(c=C, r=r, n="")
510 | output.insert(0, dim)
511 | output.insert(
512 | 0, '\' columnNames = Array("{}")'.format('", "'.join(self.headers))
513 | )
514 | return self.settings["newline"].join(output) + self.settings["newline"]
515 |
516 | def _spaced_text(self, data, delimiter, row_decoration=None, **kwargs):
517 | """
518 | General converter for formats with semantic text spacing
519 |
520 | Args:
521 | data (csv.reader): Sequence of lists
522 | delimiter (str): division between each field
523 | row_decoration (function): A function that takes Sequence of row
524 | lengths and returns a str used to optionally
525 | decorate the top, bottom, and/or between header and rows.
526 | field_format (str): format str for each field. default: ' {: <{fill}} '
527 | top (bool): Add the row decoration to the top of the output.
528 | between (bool): Add row decoration between the header and the row.
529 | bottom (bool): Add row decoration after the output.
530 | """
531 | field_format = kwargs.get("field_format", " {: <{fill}} ")
532 |
533 | # Convert data set from generator to list.
534 | data = list(data)
535 |
536 | # Get the length of each field
537 | lengths = [len(x) for x in self.headers]
538 | for row in data:
539 | cells = (
540 | len(unicodedata.normalize("NFKC", val)) + _countwide(val) for val in row
541 | )
542 | lengths = [max(i, j) for i, j in zip_longest(lengths, cells, fillvalue=0)]
543 |
544 | def format_row(row):
545 | """Helper function that generates a sequence of formatted cells"""
546 | for value, width in zip_longest(row, lengths, fillvalue=""):
547 | # Account for fullwidth ideographs and uncombined combining diacretics.
548 | yield field_format.format(
549 | value, fill=width + _countcombining(value) - _countwide(value)
550 | )
551 |
552 | # Define optional string between lines
553 | row_sep = row_decoration(lengths) if row_decoration else ""
554 |
555 | # generate a list, "head", that contains the header. It will be concat'ed with the data at the end.
556 | head = []
557 | if self.settings.get("has_header", False):
558 | if kwargs.get("top"):
559 | head.append(row_sep)
560 |
561 | head.append(
562 | delimiter + delimiter.join(format_row(self.headers)) + delimiter
563 | )
564 |
565 | if kwargs.get("between"):
566 | head.append(row_sep)
567 |
568 | # Add an optional footer below the construction
569 | bottom = [row_sep] if kwargs.get("bottom") else []
570 |
571 | # Join header and formatted body together
572 | return (
573 | self.settings["newline"].join(
574 | chain(
575 | head,
576 | (
577 | delimiter + delimiter.join(format_row(row)) + delimiter
578 | for row in data
579 | ),
580 | bottom,
581 | )
582 | )
583 | + self.settings["newline"]
584 | )
585 |
586 | def dsv(self, data):
587 | """
588 | Delimited tabular format converter.
589 | This is like taking coals to Newcastle, but useful for changing formats
590 | """
591 | self.set_syntax("Plain Text")
592 |
593 | sink = io.StringIO()
594 | writer = csv.writer(
595 | sink,
596 | dialect=self.settings.get("output_dialect"),
597 | delimiter=self.settings.get("output_delimiter"),
598 | lineterminator=self.settings.get("newline", os.linesep),
599 | )
600 | if self.settings.get("has_header") is not False:
601 | writer.writerow(self.headers)
602 | writer.writerows(data)
603 | sink.seek(0)
604 | return sink.read()
605 |
606 | def html(self, data):
607 | """HTML Table converter."""
608 | # Uses {i} and {n} as shorthand for self.settings['indent'] and self.settings['newline'].
609 | self.set_syntax("HTML")
610 | thead, tbody = "", ""
611 |
612 | tr = "{{i}}{{i}}
{{n}}{0}{{i}}{{i}}
"
613 |
614 | # Render the table head, if there is one
615 | if self.settings.get("has_header") is True:
616 | th = (
617 | "{i}{i}{i}"
618 | + (" | {n}{i}{i}{i}".join(self.headers))
619 | + " | {n}"
620 | )
621 | thead = "{i}{n}" + tr.format(th) + "{n}{i}{n}"
622 |
623 | else:
624 | thead = ""
625 |
626 | # Render table rows
627 | tbody = "{n}".join(
628 | tr.format(
629 | "{n}".join("{i}{i}{i}" + self._escape(r) + " | " for r in row)
630 | + "{n}"
631 | )
632 | for row in data
633 | )
634 |
635 | table = (
636 | "{n}" + thead + "{i}{n}" + tbody + "{n}{i}{n}
"
637 | ).format(i=self.settings["indent"], n=self.settings["newline"])
638 |
639 | if self.settings["html_utf8"]:
640 | return table
641 | else:
642 | return table.encode("ascii", "xmlcharrefreplace").decode("ascii")
643 |
644 | def gherkin(self, data):
645 | """Cucumber/Gherkin converter"""
646 | self.set_syntax("Cucumber", "Cucumber Steps")
647 | return self._spaced_text(data, "|")
648 |
649 | def javascript(self, data):
650 | """JavaScript object converter"""
651 | self.set_syntax("JavaScript")
652 | linebreak = "}," + self.settings["newline"] + self.settings["indent"] + "{"
653 | content = (
654 | "{"
655 | + linebreak.join(
656 | self.type_loop(r, '"{field}": {value}', ", ") for r in data
657 | )
658 | + "}"
659 | )
660 |
661 | return (
662 | "["
663 | + self.settings["newline"]
664 | + self.settings["indent"]
665 | + content
666 | + self.settings["newline"]
667 | + "];"
668 | )
669 |
670 | def jira(self, data):
671 | head = "||" + ("||").join(self.headers) + "||" + self.settings["newline"]
672 |
673 | fmt = "|" + ("|{}" * len(self.headers)) + "|"
674 | print("DataConverter: Formatting JIRA row with", fmt)
675 | content = (self.settings["newline"]).join(fmt.format(*r) for r in data)
676 |
677 | return head + content + self.settings["newline"]
678 |
679 | def json(self, data):
680 | """JSON properties converter"""
681 | self.set_syntax("JSON")
682 | return json.dumps(
683 | [dict(zip(self.headers, row)) for row in data],
684 | indent=len(self.settings["indent"]),
685 | ensure_ascii=False,
686 | )
687 |
688 | def json_columns(self, data):
689 | """JSON Array of Columns converter"""
690 | self.set_syntax("JSON")
691 | return json.dumps(
692 | list(zip_longest(*data)),
693 | indent=len(self.settings["indent"]),
694 | separators=(",", ":"),
695 | )
696 |
697 | def json_rows(self, data):
698 | """JSON Array of Rows converter"""
699 | self.set_syntax("JSON")
700 | return json.dumps(
701 | list(data), indent=len(self.settings["indent"]), separators=(",", ":")
702 | )
703 |
704 | def json_keyed(self, data):
705 | """JSON, first row is key"""
706 | self.set_syntax("JSON")
707 | try:
708 | keydict = {
709 | self._escape(row[0]): {k: v for k, v in zip_longest(self.headers, row)}
710 | for row in data
711 | }
712 | except IndexError:
713 | raise IndexError(
714 | "Problem converting to dictionary. Check that there are no empty rows."
715 | )
716 |
717 | return json.dumps(
718 | keydict, indent=len(self.settings["indent"]), separators=(",", ":")
719 | )
720 |
721 | def markdown(self, data):
722 | """markdown table format"""
723 | self.set_syntax("Text", "Markdown")
724 |
725 | def decorate(lengths):
726 | fields = "|".join(" " + ("-" * v) + " " for v in lengths)
727 | return "|" + fields + "|"
728 |
729 | return self._spaced_text(data, "|", decorate, between=True)
730 |
731 | def mysql(self, data):
732 | """MySQL converter"""
733 | fields = ",{n}{i}".join(
734 | h + " " + _mysql_type(t)
735 | for h, t in zip(self.headers, self.settings["types"])
736 | )
737 | create = (
738 | "CREATE TABLE IF NOT EXISTS {table} ("
739 | "{n}{i}id INT NOT NULL AUTO_INCREMENT PRIMARY KEY,{n}"
740 | + "{i}"
741 | + fields
742 | + "{n}"
743 | ");"
744 | )
745 | return self._sql(data, create)
746 |
747 | def perl(self, data):
748 | """Perl converter"""
749 | self.set_syntax("Perl")
750 | output = "}},{n}{i}{{".join(
751 | self.type_loop(r, "{q}{field}{q}=>{value}", null="undef") for r in data
752 | )
753 | return ("[{n}{i}{{" + output + "}}{n}];").format(
754 | n=self.settings["newline"], i=self.settings["indent"]
755 | )
756 |
757 | def _php(self, data, array_open, array_close):
758 | """General PHP Converter"""
759 | self.set_syntax("PHP")
760 |
761 | return (
762 | array_open
763 | + self.settings["newline"]
764 | + ("," + self.settings["newline"]).join(
765 | self.settings["indent"]
766 | + array_open
767 | + self.type_loop(row, "{q}{field}{q}=>{value}")
768 | + array_close
769 | for row in data
770 | )
771 | + self.settings["newline"]
772 | + array_close
773 | + ";"
774 | )
775 |
776 | def php4(self, data):
777 | """Older-style PHP converter"""
778 | return self._php(data, "array(", ")")
779 |
780 | def php54(self, data):
781 | """PHP 5.4 converter"""
782 | return self._php(data, "[", "]")
783 |
784 | def postgres(self, data):
785 | """PostgreSQL converter"""
786 | fields = ",{n}{i}".join(
787 | h + " " + _postgres_type(t)
788 | for h, t in zip(self.headers, self.settings["types"])
789 | )
790 | create = (
791 | "CREATE TABLE IF NOT EXISTS {table} ({n}"
792 | "{i}id serial,{n}" + "{i}" + fields + "{n}"
793 | ");"
794 | )
795 | return self._sql(data, create)
796 |
797 | def python_dict(self, data):
798 | """Python dict converter"""
799 | self.set_syntax("Python")
800 |
801 | fields = [
802 | {
803 | k: _cast(v, t)
804 | for k, v, t in zip_longest(self.headers, row, self.settings["types"])
805 | }
806 | for row in data
807 | ]
808 | return pformat(fields)
809 |
810 | def python_list(self, data):
811 | """Python list of lists converter"""
812 | self.set_syntax("Python")
813 | fields = [
814 | [_cast(r, t) for r, t in zip_longest(row, self.settings["types"])]
815 | for row in data
816 | ]
817 | return "# headers = {}{n}{}".format(
818 | self.headers, pformat(fields), n=self.settings["newline"]
819 | )
820 |
821 | def ruby(self, data):
822 | """Ruby converter"""
823 | self.set_syntax("Ruby")
824 | # comment, comment_end = "#", ""
825 | output = "}},{n}{i}{{".join(
826 | self.type_loop(row, "{q}{field}{q}=>{value}", null="nil") for row in data
827 | )
828 | return ("[{n}{i}{{" + output + "}}{n}];").format(
829 | n=self.settings["newline"], i=self.settings["indent"]
830 | )
831 |
832 | def _sql(self, data, create):
833 | """General SQL converter, used by MySQL, PostgreSQL, SQLite."""
834 | # Uses {i} and {n} as shorthand for self.settings['indent'] and self.settings['newline'].
835 | self.set_syntax("SQL")
836 | fmt = (
837 | create + "{n}" + "INSERT INTO {table}{n}{i}({names}){n}VALUES{n}"
838 | "{i}("
839 | + "),{n}{i}(".join(
840 | self.type_loop(row, field_format="{value}", null="NULL") for row in data
841 | )
842 | + ");"
843 | )
844 | return fmt.format(
845 | create=create,
846 | table=self.settings["default_variable"],
847 | names=", ".join(self.headers),
848 | i=self.settings["indent"],
849 | n=self.settings["newline"],
850 | )
851 |
852 | def sqlite(self, data):
853 | """SQLite converter"""
854 | fields = ",{n}{i}".join(
855 | h + " " + _sqlite_type(t)
856 | for h, t in zip(self.headers, self.settings["types"])
857 | )
858 | create = (
859 | "CREATE TABLE IF NOT EXISTS {table} ({n}"
860 | "{i}id INTEGER PRIMARY KEY ON CONFLICT FAIL AUTOINCREMENT,{n}"
861 | "{i}" + fields + "{n});"
862 | )
863 |
864 | return self._sql(data, create)
865 |
866 | def wiki(self, data):
867 | """Wiki table converter"""
868 | n = self.settings["newline"]
869 | linebreak = "{0}|-{0}|".format(n)
870 | header = '{| class="wikitable"' + n + "!" + ("!!").join(self.headers)
871 |
872 | return (
873 | header
874 | + linebreak
875 | + linebreak.join(self.type_loop(row, "{value}", "||") for row in data)
876 | + n
877 | + "|}"
878 | )
879 |
880 | def xml(self, data):
881 | """XML Nodes converter"""
882 | self.set_syntax("XML")
883 | elem = "{{i}}{{i}}<{1}>{0}{1}>"
884 | rows = "{n}".join(
885 | "{i}{n}"
886 | + "{n}".join(
887 | elem.format(_escape(value or ""), head)
888 | for head, value in zip(self.headers, row)
889 | )
890 | + "{n}{i}
"
891 | for row in data
892 | )
893 | output = (
894 | '{n}{n}' + rows + "{n}"
895 | )
896 | return output.format(i=self.settings["indent"], n=self.settings["newline"])
897 |
898 | def xml_properties(self, data):
899 | """XML properties converter"""
900 | self.set_syntax("XML")
901 | rows = "{n}".join(
902 | "{i}
"
908 | for row in data
909 | )
910 | output = (
911 | '{n}{n}' + rows + "{n}"
912 | )
913 | return output.format(i=self.settings["indent"], n=self.settings["newline"])
914 |
915 | def xml_illustrator(self, data):
916 | """Convert to Illustrator XML format"""
917 | self.set_syntax("XML")
918 |
919 | output = (
920 | '{n}'
921 | '{n}'
924 | '{i}{n}'
925 | '{i}{n}'
926 | '{i}{n}'
927 | '{i}{n}'
928 | '{i}{n}'
929 | "]>{n}"
930 | "{n}"
982 | )
983 |
984 | return output.format(i=self.settings["indent"], n=self.settings["newline"])
985 |
986 | def text_table(self, data):
987 | """text table converter"""
988 | self.set_syntax("Text", "Plain Text")
989 |
990 | def decorate(lengths):
991 | return "+" + "+".join("-" * (v + 2) for v in lengths) + "+"
992 |
993 | return self._spaced_text(
994 | data, "|", decorate, top=True, between=True, bottom=True
995 | )
996 |
997 | def yaml(self, data):
998 | """YAML Converter"""
999 | self.set_syntax("YAML")
1000 | linebreak = "{n}-{n}{i}"
1001 | rows = (self.type_loop(r, "{field}: {value}", "{n}{i}") for r in data)
1002 | return ("---" + linebreak + linebreak.join(rows) + "{n}").format(
1003 | n=self.settings["newline"], i=self.settings["indent"]
1004 | )
1005 |
--------------------------------------------------------------------------------
/DataConverter.sublime-commands:
--------------------------------------------------------------------------------
1 | [
2 | { "caption": "DataConverter: to ActionScript", "command": "data_converter", "args": {"format": "actionscript" } },
3 | { "caption": "DataConverter: to ASP/VBScript", "command": "data_converter", "args": {"format": "asp" } },
4 | { "caption": "DataConverter: to CSV", "command": "data_converter", "args": {"format": "dsv", "output_delimiter": "," } },
5 | { "caption": "DataConverter: to HTML table", "command": "data_converter", "args": {"format": "html" } },
6 | { "caption": "DataConverter: to Gherkin", "command": "data_converter", "args": {"format": "gherkin" } },
7 | { "caption": "DataConverter: to JavaScript object", "command": "data_converter", "args": {"format": "javascript" } },
8 | { "caption": "DataConverter: to JIRA table", "command": "data_converter", "args": {"format": "jira" } },
9 | { "caption": "DataConverter: to JSON", "command": "data_converter", "args": {"format": "json" } },
10 | { "caption": "DataConverter: to JSON (array of columns)", "command": "data_converter", "args": {"format": "json_columns" } },
11 | { "caption": "DataConverter: to JSON (array of rows)", "command": "data_converter", "args": {"format": "json_rows" } },
12 | { "caption": "DataConverter: to JSON (first column as key)", "command": "data_converter", "args": {"format": "json_keyed" } },
13 | { "caption": "DataConverter: to Markdown (Github-flavored)", "command": "data_converter", "args": {"format": "markdown" } },
14 | { "caption": "DataConverter: to MySQL", "command": "data_converter", "args": {"format": "mysql" } },
15 | { "caption": "DataConverter: to Perl", "command": "data_converter", "args": {"format": "perl" } },
16 | { "caption": "DataConverter: to PHP", "command": "data_converter", "args": {"format": "php4" } },
17 | { "caption": "DataConverter: to PHP 5.4", "command": "data_converter", "args": {"format": "php54" } },
18 | { "caption": "DataConverter: to PostgreSQL", "command": "data_converter", "args": {"format": "postgres" } },
19 | { "caption": "DataConverter: to Python dict", "command": "data_converter", "args": {"format": "python_dict" } },
20 | { "caption": "DataConverter: to Python lists", "command": "data_converter", "args": {"format": "python_list" } },
21 | { "caption": "DataConverter: to Ruby", "command": "data_converter", "args": {"format": "ruby" } },
22 | { "caption": "DataConverter: to SQLite", "command": "data_converter", "args": {"format": "sqlite" } },
23 | { "caption": "DataConverter: to text table", "command": "data_converter", "args": {"format": "text_table" } },
24 | { "caption": "DataConverter: to TSV", "command": "data_converter", "args": {"format": "dsv", "output_delimiter": "\t" } },
25 | { "caption": "DataConverter: to wiki markup", "command": "data_converter", "args": {"format": "wiki" } },
26 | { "caption": "DataConverter: to XML Nodes", "command": "data_converter", "args": {"format": "xml" } },
27 | { "caption": "DataConverter: to XML Properties", "command": "data_converter", "args": {"format": "xml_properties" } },
28 | { "caption": "DataConverter: to XML (Illustrator)", "command": "data_converter", "args": {"format": "xml_illustrator" } },
29 | { "caption": "DataConverter: to YAML", "command": "data_converter", "args": {"format": "yaml" } }
30 | ]
31 |
--------------------------------------------------------------------------------
/DataConverter.sublime-settings:
--------------------------------------------------------------------------------
1 | {
2 | // Should be either true, "sniff" or "never"
3 | // When true, the first row is always treated as the header row
4 | // When "sniff", DataConverter will sniff for headers (sniffing isn't perfect)
5 | // When false, DataConverter will assume there are no headers, and use default headers ([val1, val2, ...])
6 | "headers": "sniff",
7 |
8 | // Define a dialect for the csv reader
9 | // Check the python docs for a description of how to define a dialect
10 | // https://docs.python.org/3.3/library/csv.html#dialects-and-formatting-parameters
11 | // (Python's csv reader is hard-coded to recognise either '\r' or '\n' as end-of-line, and ignores lineterminator.)
12 | // The below example settings are the defaults in the csv modules, and will be used if a dialect can't be detected
13 | // "dialects": {
14 | // "example": {
15 | // "delimiter": ",",
16 | // "quotechar": "\"",
17 | // "escapechar": "\\",
18 | // "doublequote": false
19 | // "skipinitialspace": false,
20 | // "strict": false,
21 | // "quoting": "QUOTE_MINIMAL"
22 | // }
23 | // },
24 |
25 | // Mandate a dialect for DataConverter to use.
26 | // Could be a dialect defined in your settings file, or one defined in the csv module ('excel', 'excel_tab', 'unix_dialect').
27 | // This may be useful when specifying custom commands in a .sublime-commands file.
28 | // "use_dialect": "example",
29 |
30 | // Modern HTML served with a proper character encoding can accept UTF-8 characters.
31 | // If you're using another charset for your html, set this to false.
32 | // When false, the DataConverter 'to HTML Table' function will escape non-ascii characters (e.g. – for –)
33 | // (XML is always returned with escaped characters.)
34 | "html_utf8": true,
35 |
36 | // DataConverter will try to detect the delimiter used. If it has a problem, it will fall back on this value.
37 | // This must be one character long. Use "\t" for tab.
38 | "delimiter": ",",
39 |
40 | // For formats where keys can't have spaces, field names will be joined with this character.
41 | // By default, an underscore is used, e.g. 'Col Name' becomes 'Col_Name'
42 | // An empty string is OK
43 | "header_joiner": "_",
44 |
45 | // If true: after converting, deselects and moves the pointer to the top.
46 | // If false: leaves selection(s) in place
47 | "deselect_after": false
48 |
49 | // For some conversions (SQL, ASP), DataConverter must name the table or array being created
50 | // By default, it's called 'DataConverter', set your own value here
51 | // "default_variable": "DataConverter"
52 | }
--------------------------------------------------------------------------------
/Main.sublime-menu:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "caption": "Preferences",
4 | "mnemonic": "n",
5 | "id": "preferences",
6 | "children":
7 | [
8 | {
9 | "caption": "Package Settings",
10 | "mnemonic": "P",
11 | "id": "package-settings",
12 | "children":
13 | [
14 | {
15 | "caption": "DataConverter",
16 | "children":
17 | [
18 | {
19 | "command": "open_file",
20 | "args": {"file": "${packages}/DataConverter/DataConverter.sublime-settings"},
21 | "caption": "Settings – Default"
22 | },
23 | {
24 | "command": "open_file",
25 | "args": {"file": "${packages}/User/DataConverter.sublime-settings"},
26 | "caption": "Settings – User"
27 | },
28 | { "caption": "-" }
29 | ]
30 | }
31 | ]
32 | }
33 | ]
34 | }
35 | ]
36 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Sublime DataConverter
2 |
3 | This [Sublime Text](http://www.sublimetext.com/) package converts csv files to various other formats. It's been adapted from the wonderful [Mr. Data Converter](https://shancarter.github.io/mr-data-converter/).
4 |
5 | After installing, you'll find commands look like __DataConverter: to *foo*__ in the Command Palette. DataConverter will convert a selection or multiple selections. If nothing is selected, the entire document is converted.
6 |
7 | ### Examples
8 |
9 | Turn this:
10 |
11 | name,value,fruit,date
12 | Alice,10,Apple,"Sep. 12, 2016"
13 | Bob,11,Blueberry,"Sep. 13, 2016"
14 | Chris,12,Orange,"Sep. 14, 2016"
15 |
16 | into this (Ruby):
17 |
18 | ```ruby
19 | [{"name"=>"Alice", "value"=>10, "fruit"=>"Apple", "date"=>"Sep. 12, 2016"},
20 | {"name"=>"Bob", "value"=>11, "fruit"=>"Blueberry", "date"=>"Sep. 13, 2016"},
21 | {"name"=>"Chris", "value"=>12, "fruit"=>"Orange", "date"=>"Sep. 14, 2016"}];
22 | ```
23 |
24 | or this (JSON):
25 |
26 | ```javascript
27 | [
28 | {"fruit": "Apple", "name": "Alice", "value": "10", "date": "Sep. 12, 2016"},
29 | {"fruit": "Blueberry", "name": "Bob", "value": "11", "date": "Sep. 13, 2016"},
30 | {"fruit": "Orange", "name": "Chris", "value": "12", "date": "Sep. 14, 2016"}
31 | ]
32 | ```
33 |
34 | ### Formats supported
35 |
36 | * ActionScript
37 | * ASP
38 | * HTML tables
39 | * Gherkin
40 | * JIRA (Atlassian Confluence)
41 | * JSON
42 | * JSON (array of columns)
43 | * JSON (array of rows)
44 | * JSON (object, first column is key)
45 | * Javascript object
46 | * Markdown (Github-flavored)
47 | * Perl
48 | * PHP (two formats)
49 | * Python (list of dicts)
50 | * Python (list of lists)
51 | * Ruby
52 | * SQL (Postgres, MySQL and SQLite)
53 | * text table
54 | * Wiki markup
55 | * XML
56 | * XML (property list)
57 | * XML for data-driven Adobe Illustrator
58 | * YAML
59 |
60 | Additionally, DataConverter can convert between delimiters. By default, this includes commands to convert to CSV and TSV, and it's possible to add your own delimiter (create a `User.sublime-commands` file following the pattern in [`DataConverter.sublime-commands`](DataConverter.sublime-commands)).
61 |
62 | ## Installation
63 |
64 | ### With Package Control
65 | If you have [Package Control](http://github.com/wbond/sublime_package_control) installed, you can install DataConverter from within Sublime Text. Open the Command Palette and enter "Package Control: Install Package", then search for __DataConverter__.
66 |
67 | ### Without Package Control
68 | Clone the repository into your Sublime Text packages directory:
69 |
70 | git clone git://github.com/fitnr/SublimeDataConverter.git
71 |
72 | ### Without Package Control or Git
73 | Click `Download Zip` above to download the package. Unzip it, rename the folder "DataConverter" and move it into your Sublime Text 2 packages directory (*Preferences > Browse Packages* in the application menu).
74 |
75 | ## Limitations
76 |
77 | CSV containing Unicode characters aren't supported in the Sublime Text 2 version of the package. This is due to limitations in the Python 2.6 csv module. Unicode is fully supported in the Sublime Text 3 version of the package.
78 |
79 | ## Problems?
80 |
81 | [Submit an issue](https://github.com/fitnr/SublimeDataConverter/issues).
82 |
83 | ## Contributing
84 |
85 | Pull requests with additional formats are encouraged.
86 |
87 | ## Configuration
88 |
89 | DataConverter reads the following options from your settings file (Preferences > Package Settings > DataConverter > Settings - User).
90 |
91 | #### headers
92 |
93 | Possible values: `"sniff"`, `true`, or `false`.
94 | ````
95 | "headers": "sniff"
96 | ````
97 |
98 | When true, the first row is always treated as the header row. When "sniff", DataConverter will sniff for headers (sniffing isn't perfect). When false, DataConverter will assume there are no headers, and use default headers (`[val1, val2, ...]`).
99 |
100 | #### dialects
101 | Object
102 | ````
103 | "dialects": {
104 | "example": {
105 | "delimiter": ",",
106 | "quotechar": "\"",
107 | "escapechar": "\\",
108 | "doublequote": false
109 | "skipinitialspace": false,
110 | "strict": false,
111 | "quoting": "QUOTE_MINIMAL"
112 | }
113 | }
114 | ````
115 | Defines a dialect for the CSV reader. Check the python docs for a [description of how to define a dialect](https://docs.python.org/3.3/library/csv.html#dialects-and-formatting-parameters).
116 |
117 | DataConverter will try to detect a dialect, but it may fail. Define a dialect with this setting and then tell DataConverter to use it with the `use_dialect` option.
118 |
119 | Note that Python's csv reader is hard-coded to recognise either '\r' or '\n' as end-of-line, and ignores lineterminator.
120 |
121 | #### use_dialect
122 | String
123 | ````
124 | "use_dialect": "example"
125 | ````
126 | Mandate a dialect for DataConverter to use. Could be a dialect defined in your settings file, or one defined in the csv module ("excel", "excel_tab", "unix_dialect"). This may be useful for specifying custom commands in a `.sublime-commands` file.
127 |
128 | #### html_utf8
129 | Boolean
130 | ````
131 | "html_utf8": true
132 | ````
133 | Modern HTML served with a proper character encoding can accept UTF-8 characters. If you're using another charset for your html, set this to `false`. When `false`, the 'DataConverter: to HTML Table' function will escape non-ascii characters (e.g. `–` for –). (XML is always returned with escaped characters.)
134 |
135 |
136 | #### delimiter
137 | Character
138 | ````
139 | "delimiter": ","
140 | ````
141 | DataConverter will try to detect the delimiter used in your data. If it has a problem, it will fall back on this value. This must be one character long. Use `"\t"` for tab.
142 |
143 | #### header_joiner
144 | String
145 | ````
146 | "header_joiner": "_"
147 | ````
148 | For formats where keys can't have spaces, field names will be joined with this character. By default, an underscore is used, e.g. 'Col Name' becomes 'Col_Name'. An empty string is OK.
149 |
150 | #### deselect_after
151 | Boolean
152 | ````
153 | "deselect_after": false
154 | ````
155 | If `true`: after converting, deselects and moves the pointer to the top. If `false`: leaves selection(s) in place
156 |
157 | #### default_variable
158 | ````
159 | "default_variable": "DataConverter"
160 | ````
161 | For some conversions (SQL, ASP), DataConverter must name the table or array being created. By default, it's called 'DataConverter', any string value is accepted.
162 |
163 |
--------------------------------------------------------------------------------
/messages.json:
--------------------------------------------------------------------------------
1 | {
2 | "1.1.0": "messages/1.1.0.txt",
3 | "1.1.2": "messages/1.1.2.txt",
4 | "1.1.3": "messages/1.1.3.txt",
5 | "1.2": "messages/1.2.txt"
6 | }
7 |
--------------------------------------------------------------------------------
/messages/1.1.0.txt:
--------------------------------------------------------------------------------
1 | DataConverter 1.1.0
2 | -------------------
3 |
4 | This update removes the "delimiter" setting, which overlapped with the "dialects" setting.
5 | If you wish to read a specific DSV syntax, the "use_dialect" setting allows you to set a specific dialect.
6 |
7 | Changes
8 | --------
9 | * pretty-printed Python output
10 | * output is now escaped
11 | * better error handling
12 | * Dummy variable no longer used in PHP and JS output
13 | * Corrected HTML indentation
14 |
15 | If you have a problem or there's a format you would like to see in DataConverter, visit https://github.com/fitnr/SublimeDataConverter/issues
16 |
--------------------------------------------------------------------------------
/messages/1.1.2.txt:
--------------------------------------------------------------------------------
1 | DataConverter 1.1.2
2 | -------------------
3 |
4 | Adds an SQLite converter and conversion between arbitrary CSV delimiters. Convert from TSV to CSV and back again!
5 |
6 | If you have a problem or there's a format you would like to see in DataConverter, visit https://github.com/fitnr/SublimeDataConverter/issues
7 |
--------------------------------------------------------------------------------
/messages/1.1.3.txt:
--------------------------------------------------------------------------------
1 | DataConverter 1.1.3
2 | -------------------
3 |
4 | Adds a Github-flavored Markdown table converter.
5 |
6 | If you have a problem or there's a format you would like to see in DataConverter, visit https://github.com/fitnr/SublimeDataConverter/issues
7 |
--------------------------------------------------------------------------------
/messages/1.2.txt:
--------------------------------------------------------------------------------
1 | DataConverter 1.2
2 | -------------------
3 |
4 | Add PostgreSQL converter.
5 |
6 | Improved handling of ideographic (Chinese, Japanese, Korean) characters in spaced text formats.
7 |
8 | Improved handling of CSVs with irregular number of columns in many formats.
9 |
10 | If you have a problem or there's a format you would like to see in DataConverter, visit https://github.com/fitnr/SublimeDataConverter/issues
11 |
--------------------------------------------------------------------------------