changed hex_metadata.config
 
@@ -8,9 +8,7 @@
8
8
<<"lib/makeup/formatters/html/html_formatter.ex">>,
9
9
<<"lib/makeup/lexer.ex">>,<<"lib/makeup/lexer/common.ex">>,
10
10
<<"lib/makeup/lexer/common/ascii.ex">>,
11
- <<"lib/makeup/lexer/common/macros.ex">>,
12
- <<"lib/makeup/lexers/elixir_lexer.ex">>,
13
- <<"lib/makeup/lexers/html5_lexer.ex">>,<<"lib/makeup/pickers.ex">>,
11
+ <<"lib/makeup/lexer/common/macros.ex">>,<<"lib/makeup/pickers.ex">>,
14
12
<<"lib/makeup/styles/html.ex">>,
15
13
<<"lib/makeup/styles/html/pygments/abap.ex">>,
16
14
<<"lib/makeup/styles/html/pygments/algol.ex">>,
 
@@ -68,4 +66,4 @@
68
66
{<<"name">>,<<"html_entities">>},
69
67
{<<"optional">>,false},
70
68
{<<"requirement">>,<<"~> 0.3.0">>}]]}.
71
- {<<"version">>,<<"0.1.3">>}.
69
+ {<<"version">>,<<"0.2.0">>}.
changed lib/makeup.ex
 
@@ -1,4 +1,10 @@
1
1
defmodule Makeup do
2
+ @moduledoc """
3
+ Syntax highlighting library for code, inspired by Pygments.
4
+
5
+ By default, it doesn't include any lexers.
6
+ You must import them separately (even the Elixir lexer).
7
+ """
2
8
alias Makeup.Formatters.HTML.HTMLFormatter
3
9
alias Makeup.Lexers.ElixirLexer
4
10
alias Makeup.Styles.HTML.StyleMap
changed lib/makeup/lexer/common.ex
 
@@ -127,7 +127,7 @@ defmodule Makeup.Lexer.Common do
127
127
left, right,
128
128
interpol_left, interpol_inner, interpol_right,
129
129
options \\ []) do
130
- # *Bewhare those who enter, for here lies dark magic!*
130
+ # *Beware those who enter, for here lies dark magic!*
131
131
#
132
132
# In an act of extreme cultural insensitivity, the names of the arguments for this macro
133
133
# assume a right-to-left writing system.
changed lib/makeup/lexer/common/ascii.ex
 
@@ -6,7 +6,6 @@ defmodule Makeup.Lexer.Common.ASCII do
6
6
7
7
alias Makeup.Lexer.Common.Macros, as: M
8
8
require M
9
-
10
9
11
10
@doc """
12
11
Recognizes an ASCII space (`[?\\s, ?\\t, ?\\n, ?\\r, ?\\f, ?\\v]`).
removed lib/makeup/lexers/elixir_lexer.ex
 
@@ -1,793 +0,0 @@
1
- defmodule Makeup.Lexers.ElixirLexer do
2
- @moduledoc """
3
- Documentation for the Elixir lexer.
4
- """
5
-
6
- use ExSpirit.Parser, text: true
7
-
8
- import Makeup.Lexer.Common
9
- # This is required due to some speed optimizations in ExSpirit
10
- require Makeup.Lexer.Common.Macros
11
-
12
- require Makeup.Token.TokenTypes
13
- alias Makeup.Token.TokenTypes, as: Tok
14
-
15
- import Makeup.Lexer.Common.ASCII, only: [
16
- space: 1, spaces1: 1, digits1: 1,
17
- hex_digits1: 1, lowercase_letter_: 1,
18
- alphanums_: 1, alphanum_: 1]
19
-
20
- # # Actual lexer
21
-
22
- # ## Sigils
23
-
24
- defrule embed(
25
- alt([
26
- # Deprecated form of hexadecimal characters
27
- token(
28
- seq([
29
- lit("\\x"),
30
- alt([
31
- # Long form: `\x{H*}` (*deprecated*)
32
- seq([
33
- char(?{),
34
- chars([?0..?9, ?a..?f, ?A..?F]),
35
- char(?})
36
- ]),
37
- # Short form: `\xHH`
38
- chars([?0..?9, ?a..?f, ?A..?F], 2)
39
- ])
40
- ]),
41
- Tok.string_escape
42
- ),
43
- # Unicode code points `\xHHHH`
44
- token(
45
- seq([
46
- lit("\\u"),
47
- chars([?0..?9, ?a..?f, ?A..?F], 4)
48
- ]),
49
- Tok.string_escape
50
- ),
51
- # Normal escape character
52
- token(
53
- seq([char(?\\), char()]),
54
- Tok.string_escape
55
- )
56
- ])
57
- )
58
-
59
- defrule interpol(
60
- seq([
61
- tag(:open,
62
- token(lit("\#{"), Tok.string_interpol)),
63
- tag(:middle,
64
- repeat(
65
- lookahead_not(lit("}")) |> root_element())),
66
- tag(:close,
67
- token(lit("}"), Tok.string_interpol))
68
- ])
69
- ), pipe_result_into: process_delimiter_groups
70
-
71
- defrule embed_interpol(
72
- alt([
73
- embed,
74
- interpol
75
- ])
76
- )
77
-
78
- defrule sigil_no_interpol(
79
- seq([
80
- tag(:open,
81
- lexeme(
82
- seq([
83
- char(?~),
84
- char(?A..?Z)
85
- ])
86
- )
87
- ),
88
-
89
- tag(:middle,
90
- alt([
91
- # Heredocs must come first.
92
- # Otherwise, the `"..."` will ensure that `"""..."""` never matches.
93
- # The same applies to `'...'` vs `'''...'''`
94
- #
95
- # Heredoc sigils - double quotes
96
- string_like("\"\"\"", embed, "\"\"\""),
97
- # Heredoc sigils - single quotes
98
- string_like("'''", embed, "'''"),
99
- # Other sigils can come afterwards
100
- string_like("{", embed, "}"),
101
- string_like("[", embed, "]"),
102
- string_like("(", embed, ")"),
103
- string_like("<", embed, ">"),
104
- string_like("/", embed, "/"),
105
- string_like("|", embed, "|"),
106
- string_like("'", embed, "'"),
107
- string_like("\"", embed, "\"")
108
- ])
109
- ),
110
-
111
- # Making this up because I can't find anything in the syntax reference.
112
- # These characters are the ones that seem to work.
113
- tag(:close,
114
- lexeme(
115
- chars([?a..?z, ?A..?Z], 0)
116
- )
117
- )
118
- ])
119
- ), pipe_result_into: sigil_to_tokens
120
-
121
- defp sigil_to_tokens([open: open, middle: parsed, close: modifiers])
122
- when open == "~R" or open == "~r" do
123
-
124
- [as_token(open, Tok.string_regex),
125
- string_like_to_tokens(parsed, Tok.string_regex),
126
- as_token(modifiers, Tok.string_regex)]
127
- end
128
-
129
- defp sigil_to_tokens([open: open, middle: parsed, close: modifiers])
130
- when open == "~S" or open == "~s" do
131
-
132
- [as_token(open, Tok.string),
133
- string_like_to_tokens(parsed, Tok.string),
134
- as_token(modifiers, Tok.string)]
135
- end
136
-
137
- defp sigil_to_tokens([open: other, middle: parsed, close: modifiers]) do
138
- [as_token(other, Tok.string_sigil),
139
- string_like_to_tokens(parsed, Tok.string_sigil),
140
- as_token(modifiers, Tok.string_sigil)]
141
- end
142
-
143
- defrule sigil_interpol(
144
- seq([
145
- tag(:open,
146
- lexeme(
147
- seq([
148
- char(?~),
149
- char(?a..?z)
150
- ])
151
- )
152
- ),
153
-
154
- tag(:middle,
155
- alt([
156
- # Heredocs must come first.
157
- # Otherwise, the `"..."` will ensure that `"""..."""` never matches.
158
- # The same applies to `'...'` vs `'''...'''`
159
- #
160
- # Heredoc sigils - double quotes
161
- string_like("\"\"\"", embed_interpol, "\"\"\""),
162
- # Heredoc sigils - single quotes
163
- string_like("'''", embed_interpol, "'''"),
164
- # Other sigils can come afterwards
165
- string_like("{", embed_interpol, "}"),
166
- string_like("[", embed_interpol, "]"),
167
- string_like("(", embed_interpol, ")"),
168
- string_like("<", embed_interpol, ">"),
169
- string_like("/", embed_interpol, "/"),
170
- string_like("|", embed_interpol, "|"),
171
- string_like("'", embed_interpol, "'"),
172
- string_like("\"", embed_interpol, "\"")
173
- ])
174
- ),
175
-
176
- # Making this up because I can't find anything in the syntax reference.
177
- # These characters are the ones that seem to work.
178
- tag(:close,
179
- lexeme(
180
- chars([?a..?z, ?A..?Z], 0)
181
- )
182
- )
183
- ])
184
- ), pipe_result_into: sigil_to_tokens
185
-
186
-
187
-
188
- # Heredocs
189
-
190
- defrule heredoc_double(
191
- string_like("\"\"\"", embed_interpol, "\"\"\"")
192
- ), pipe_result_into: string_like_to_tokens(Tok.string_double)
193
-
194
- defrule heredoc_single(
195
- string_like("'''", embed_interpol, "'''")
196
- ), pipe_result_into: string_like_to_tokens(Tok.string_single)
197
-
198
- # Strings and Character Lists
199
-
200
- defrule string(
201
- string_like("\"", embed_interpol, "\"")
202
- ), pipe_result_into: string_like_to_tokens(Tok.string_double)
203
-
204
- defrule charlist(
205
- string_like("'", embed_interpol, "'")
206
- ), pipe_result_into: string_like_to_tokens(Tok.string_single)
207
-
208
-
209
- # Elixir Operators and Tok.punctuation
210
- #
211
- # These rules use the convenient `words` macro to save some typing.
212
- # They should use a prefix tree, but it's probably not worth it.
213
-
214
- defrule operator_name(
215
- words(~W(
216
- <<< >>> ||| &&& ^^^ ~~~ === !== ~>> <~> |~> <|>
217
- == != <= >= && || \\ <> ++ -- |> =~ -> <- ~> <~
218
- = < > + - * / | . ^ & !
219
- ))
220
- )
221
-
222
- defrule operator(
223
- token(operator_name, Tok.operator)
224
- )
225
-
226
- defrule punctuation(
227
- token(
228
- words(~W{ \\\\ << >> => ( ) : ; , [ ] . % }),
229
- Tok.punctuation)
230
- )
231
-
232
- # ## Numbers
233
-
234
- # Binary numbers
235
- defrule number_bin(
236
- token(
237
- seq([
238
- lit("0b"),
239
- repeat(char(?0..?1), 1)]),
240
- Tok.number_bin)
241
- )
242
-
243
- # Octal numbers
244
- defrule number_oct(
245
- token(
246
- seq([
247
- lit("0o"),
248
- repeat(char(?0..?7), 1)]),
249
- Tok.number_oct)
250
- )
251
-
252
- # Hexadecimal numbers
253
- defrule number_hex(
254
- token(
255
- seq([
256
- lit("0x"),
257
- hex_digits1()]),
258
- Tok.number_hex)
259
- )
260
-
261
- # Normal base 10 integers
262
-
263
- defrule integer_part(
264
- seq([
265
- digits1(),
266
- repeat(
267
- seq([
268
- char(?_),
269
- digits1()
270
- ])
271
- )
272
- ])
273
- )
274
-
275
- defrule number_integer(
276
- token(integer_part, Tok.number_integer)
277
- )
278
-
279
- # Floating point numbers
280
- defrule float(
281
- # TODO: make it more strict.
282
- # Currently, this accepts things the elixir compiler doesn't
283
- token(
284
- seq([
285
- # Integer part (same as an integer)
286
- integer_part(),
287
- # Decimal point
288
- char(?.),
289
- # Fractional part (same as an integer)
290
- integer_part(),
291
- # (Optional) scientific notation
292
- repeat(
293
- seq([
294
- char([?e, ?E]),
295
- # Optional minus sign
296
- chars(?-, 0, 1),
297
- # Exponent
298
- integer_part()
299
- ]), 0, 1)]),
300
- Tok.number_float)
301
- )
302
-
303
- # ## Names
304
-
305
- defrule module_name_part(
306
- seq([
307
- char(?A..?Z),
308
- chars([?a..?z, ?A..?Z, ?_, ?0..?9], 0),
309
- repeat(char([??, ?!]), 0, 1)
310
- ])
311
- )
312
-
313
- defrule module_name(
314
- token(
315
- seq([
316
- module_name_part(),
317
- repeat(
318
- seq([
319
- char(?.),
320
- module_name_part()]))]),
321
- Tok.name_class)
322
- )
323
-
324
- defrule anon_function_arguments(
325
- token(
326
- seq([
327
- char(?&),
328
- chars(?0..?9)
329
- ]),
330
- Tok.name_entity)
331
- )
332
-
333
- defrule long_hex_char(
334
- token(
335
- seq([
336
- lit("?"),
337
- lit("\\x{"),
338
- hex_digits1(),
339
- lit("}")]),
340
- Tok.string_char)
341
- )
342
-
343
- defrule hex_char(
344
- token(
345
- seq([
346
- lit("?"),
347
- lit("\\x"),
348
- chars([?0..?9, ?a..?f, ?A..?F], 1, 2)]),
349
- Tok.string_char)
350
- )
351
-
352
- defrule normal_char(
353
- token(
354
- seq([
355
- char(??),
356
- char()
357
- ]),
358
- Tok.string_char
359
- )
360
- )
361
-
362
- defrule escape_char(
363
- token(
364
- seq([
365
- lit("?\\"),
366
- char()]),
367
- Tok.string_char)
368
- )
369
-
370
- defrule triple_colon(
371
- token(lit(":::"), Tok.operator)
372
- )
373
-
374
- defrule double_colon(
375
- token(lit("::"), Tok.operator)
376
- )
377
-
378
- # special_atom_re = r'(?:\.\.\.|<<>>|%\{\}|%|\{\})'
379
- defrule special_atom_name(
380
- alt([
381
- lit("..."),
382
- lit("<<>>"),
383
- lit("%{}"),
384
- lit("%{"),
385
- lit("%"),
386
- lit("{}")])
387
- )
388
-
389
- defrule special_atom(
390
- token(
391
- seq([
392
- char(?:),
393
- special_atom_name
394
- ]),
395
- Tok.string_symbol
396
- )
397
- )
398
-
399
- defrule normal_atom(
400
- token(
401
- seq([
402
- char(?:),
403
- alt([
404
- operator_name(),
405
- name_part()
406
- ])
407
- ]),
408
- Tok.string_symbol)
409
- )
410
-
411
- defrule complex_atom(
412
- seq([
413
- token(char(?:), Tok.string_symbol),
414
- tag(:string,
415
- alt([
416
- string_like("'", embed_interpol, "'"),
417
- string_like("\"", embed_interpol, "\"")
418
- ])
419
- )
420
- ])
421
- ), pipe_result_into: complex_atom_to_toks
422
-
423
- defp complex_atom_to_toks([start, string: string]) do
424
- [start, string_like_to_tokens(string, Tok.string_symbol)]
425
- end
426
-
427
- defrule name_part(
428
- seq([
429
- lowercase_letter_(),
430
- alphanums_(),
431
- #char([?a..?z, ?_]),
432
- #word_chars(),
433
- repeat(char([??, ?!]), 0, 1)
434
- ])
435
- )
436
-
437
- defrule name(
438
- token(name_part(), Tok.name)
439
- )
440
-
441
- defrule attribute(
442
- token(
443
- seq([
444
- char(?@),
445
- name_part()]),
446
- Tok.name_attribute)
447
- )
448
-
449
- defrule complex_name(
450
- alt([name, module_name, operator])
451
- )
452
-
453
- defrule keyword(
454
- seq([
455
- token(
456
- alt([
457
- special_atom_name(),
458
- complex_name()]),
459
- Tok.string_symbol),
460
- token(
461
- char(?:) |> lookahead(space()),
462
- Tok.punctuation)
463
- ])
464
- )
465
-
466
- defrule inline_comment(
467
- token(
468
- seq([
469
- char(?#),
470
- repeat(
471
- lookahead_not(char(?\n))
472
- |> lookahead_not(eoi())
473
- |> char()),
474
- alt([
475
- eoi(),
476
- char(?\n)])]),
477
- Tok.comment_single)
478
- )
479
-
480
- # iex(1)>
481
- # iex>
482
- defrule iex_prompt(
483
- token(
484
- seq([
485
- alt([
486
- lit("iex"),
487
- lit("...")
488
- ]),
489
- repeat(
490
- seq([
491
- lit("("),
492
- digits1(),
493
- lit(")")
494
- ]), 0, 1),
495
- lit(">")]),
496
- Tok.generic_prompt)
497
- )
498
-
499
- # Matching delimiters
500
- # - parenthesis - ()
501
- # - tuple: {}
502
- # - straight brackets - []
503
- # - binaries - <<>>
504
- # - map - %{}
505
- # - struct %Module{}
506
-
507
- defp process_delimiter_groups([open: open, middle: middle, close: close]) do
508
- # Generate unique value
509
- uid = unique_value()
510
- processed =
511
- # Mark the opening tag as belonging to the group `uid`
512
- (open |> List.wrap |> List.flatten |> as_group(uid)) ++
513
- # No need to anything to the middle part
514
- middle ++
515
- # Mark the closing tag as belonging to the group `uid`
516
- (close |> List.wrap |> List.flatten |> as_group(uid))
517
- processed
518
- end
519
-
520
- defrule tuple(
521
- seq([
522
- tag(:open,
523
- token(lit(?{), Tok.punctuation)
524
- ),
525
- tag(:middle,
526
- repeat(lookahead_not(lit(?})) |> root_element())
527
- ),
528
- tag(:close,
529
- token(lit(?}), Tok.punctuation)
530
- )
531
- ])
532
- ), pipe_result_into: process_delimiter_groups
533
-
534
-
535
- defrule struct_(
536
- seq([
537
- tag(:open,
538
- seq([
539
- token(char(?%), Tok.punctuation),
540
- module_name(),
541
- token(char(?{), Tok.punctuation)])),
542
- tag(:middle,
543
- repeat(lookahead_not(char(?})) |> root_element())),
544
- tag(:close,
545
- token(char(?}), Tok.punctuation))
546
- ])
547
- ), pipe_result_into: process_delimiter_groups
548
-
549
-
550
- defrule map(
551
- seq([
552
- tag(:open,
553
- token(lit("%{"), Tok.punctuation)),
554
- tag(:middle,
555
- repeat(lookahead_not(char(?})) |> root_element())),
556
- tag(:close,
557
- token(char(?}), Tok.punctuation))])
558
- ), pipe_result_into: process_delimiter_groups
559
-
560
-
561
- defrule parens(
562
- seq([
563
- tag(:open,
564
- token(char(?(), Tok.punctuation)),
565
- tag(:middle,
566
- repeat(
567
- lookahead_not(lit(?))) |> root_element())),
568
- tag(:close,
569
- token(char(?)), Tok.punctuation))])
570
- ), pipe_result_into: process_delimiter_groups
571
-
572
-
573
- defrule list(
574
- seq([
575
- tag(:open,
576
- token(lit(?[), Tok.punctuation)),
577
- tag(:middle,
578
- repeat(lookahead_not(lit(?])) |> root_element())),
579
- tag(:close,
580
- token(lit(?]), Tok.punctuation))])
581
- ), pipe_result_into: process_delimiter_groups
582
-
583
-
584
- defrule binary(
585
- seq([
586
- tag(:open,
587
- token(lit("<<"), Tok.punctuation)),
588
- tag(:middle,
589
- repeat(lookahead_not(lit(">>")) |> root_element())),
590
- tag(:close,
591
- token(lit(">>"), Tok.punctuation))])
592
- ), pipe_result_into: process_delimiter_groups
593
-
594
-
595
- defrule block_keyword(
596
- seq([
597
- alt([
598
- lit("else"),
599
- lit("catch"),
600
- lit("rescue"),
601
- lit("after"),
602
- lit("end")
603
- ]),
604
- lookahead_not(alphanum_())
605
- ])
606
- )
607
-
608
- defrule block_keyword_middle(
609
- seq([
610
- alt([
611
- lit("else"),
612
- lit("catch"),
613
- lit("rescue"),
614
- lit("after")
615
- ]),
616
- lookahead_not(alphanum_())
617
- ])
618
- )
619
-
620
- defrule end_keyword(
621
- lit("end") |> lookahead_not(alphanum_())
622
- )
623
-
624
- defrule do_block(
625
- seq([
626
- tag(:keyword,
627
- token(lit("do") |> lookahead_not(alphanum_()), Tok.name)),
628
- tag(:normal,
629
- repeat(
630
- lookahead_not(block_keyword())
631
- |> root_element(), 1)),
632
- repeat(
633
- seq([
634
- tag(:keyword,
635
- token(block_keyword_middle(), Tok.name)),
636
- tag(:normal,
637
- repeat(
638
- lookahead_not(block_keyword())
639
- |> root_element(), 1))
640
- ])),
641
- tag(:keyword,
642
- token(end_keyword(), Tok.name))
643
- ])
644
- ), pipe_result_into: process_do_block
645
-
646
- defp process_do_block(results) do
647
- uid = unique_value()
648
- results |> List.flatten |> Enum.map(&tag_do_block_element(&1, uid))
649
- end
650
-
651
- defp tag_do_block_element({:keyword, keyword}, uid), do: as_group(keyword, uid)
652
- defp tag_do_block_element({:normal, toks}, _), do: toks
653
-
654
-
655
- defrule fn_end(
656
- seq([
657
- tag(:open,
658
- token(lit("fn") |> lookahead_not(alphanum_()), Tok.name)),
659
- tag(:middle,
660
- repeat(
661
- lookahead_not(end_keyword())
662
- |> root_element(),
663
- 1)),
664
- tag(:close,
665
- token(end_keyword(), Tok.name))
666
- ])
667
- ), pipe_result_into: process_delimiter_groups
668
-
669
-
670
-
671
- defrule any_char(
672
- token(char(), Tok.error)
673
- )
674
-
675
- defrule whitespace(
676
- token(spaces1(), Tok.whitespace)
677
- )
678
-
679
- defrule root_element(
680
- alt([
681
- # Whitespaces
682
- whitespace(),
683
- # Comment
684
- inline_comment(),
685
- # iex prompt
686
- iex_prompt(),
687
- # Chars
688
- long_hex_char(),
689
- hex_char(),
690
- escape_char(),
691
- normal_char(),
692
- # Some operators (must come before the atoms)
693
- triple_colon(),
694
- double_colon(),
695
- # Atoms
696
- special_atom(),
697
- complex_atom(),
698
- normal_atom(),
699
- # missing: atoms with single or double quotes
700
- # Module attributes
701
- attribute(),
702
- # Keywords syntax sugar (must come before names)
703
- keyword(),
704
- # Do block (must come before names)
705
- do_block(),
706
- # fn ... end (must also come before names)
707
- fn_end(),
708
- # Name
709
- name(),
710
- # Module
711
- module_name(),
712
- # Anonymous function arguments (must come before the operators)
713
- anon_function_arguments(),
714
- # Maps, tuples and structs must be matched before punctuation and operators
715
- struct_(),
716
- map(),
717
- tuple(),
718
- parens(),
719
- list(),
720
- binary(),
721
- # Operators
722
- operator(),
723
- # Punctuation
724
- punctuation(),
725
- # Numbers
726
- number_bin(),
727
- number_oct(),
728
- number_hex(),
729
- # Floats must come before integers
730
- float(),
731
- number_integer(),
732
- # Sigils (regexs are a special case of sigils)
733
- sigil_no_interpol(),
734
- sigil_interpol(),
735
- # Heredocs must come before strings
736
- heredoc_double(),
737
- heredoc_single(),
738
- # Strings and charlists (must come after the heredocs)
739
- string(),
740
- charlist(),
741
- any_char()
742
- ])
743
- )
744
-
745
- @keywords MapSet.new(~W[fn do end after else rescue catch with])
746
-
747
- @keyword_operators MapSet.new(~W[not and or when in])
748
-
749
- @builtin MapSet.new(~W[
750
- case cond for if unless try receive raise
751
- quote unquote unquote_splicing throw super])
752
-
753
- @builtin_declaration MapSet.new(~W[
754
- def defp defmodule defprotocol defmacro defmacrop
755
- defdelegate defexception defstruct defimpl defcallback])
756
-
757
- @builtin_namespace MapSet.new(~W[import require use alias])
758
-
759
- @constant MapSet.new(~W[nil true false])
760
-
761
- @pseudovar MapSet.new(~W[_ __MODULE__ __DIR__ __ENV__ __CALLER__])
762
-
763
- defp postprocess({:name, meta, value}) do
764
- name = cond do
765
- MapSet.member?(@keywords, value) -> Tok.keyword
766
- MapSet.member?(@keyword_operators, value) -> Tok.operator_word
767
- MapSet.member?(@builtin, value) -> Tok.keyword
768
- MapSet.member?(@builtin_declaration, value) -> Tok.keyword_declaration
769
- MapSet.member?(@builtin_namespace, value) -> Tok.keyword_namespace
770
- MapSet.member?(@constant, value) -> Tok.name_constant
771
- MapSet.member?(@pseudovar, value) -> Tok.name_builtin_pseudo
772
- true -> :name
773
- end
774
- {name, meta, value}
775
- end
776
- defp postprocess(token), do: token
777
-
778
- # The root rule for outer grammar
779
- defrule root(
780
- repeat(root_element())
781
- # Turn the list of list into a flat list of `{text, tag}` pairs.
782
- ), pipe_result_into: List.flatten
783
-
784
- def lex(source, merge \\ true) do
785
- raw = parse(source, root).result
786
- maybe_merged = case merge do
787
- false -> raw
788
- true -> Makeup.Lexer.merge(raw)
789
- end
790
- Enum.map(maybe_merged, &postprocess/1)
791
- end
792
-
793
- end
\ No newline at end of file
removed lib/makeup/lexers/html5_lexer.ex
 
@@ -1,325 +0,0 @@
1
- defmodule Makeup.Lexers.HTML5Lexer do
2
- @moduledoc """
3
- Documentation for the HTML5 lexer.
4
-
5
- Unlike the Pygments lexer that served as an inspiration (which uses Regexs),
6
- we try to make a proper job out of parsing the HTML and extract some structure.
7
-
8
- We *will* match opening and closing tags, and will make use of those tags to
9
- add some functionality.
10
- """
11
-
12
- use ExSpirit.Parser, text: true
13
-
14
- import Makeup.Lexer.Common
15
- # This is required due to some speed optimizations in ExSpirit
16
- require Makeup.Lexer.Common.Macros
17
-
18
- require Makeup.Token.TokenTypes
19
- alias Makeup.Token.TokenTypes, as: Tok
20
-
21
- alias Makeup.Lexer.Common.ASCII
22
- require ASCII
23
- import ASCII, only: [spaces: 1, spaces1: 1]
24
-
25
- # Still missing:
26
- # - write CSS lexer
27
- # - write javascript lexer
28
-
29
-
30
- defrule entity(
31
- token(
32
- seq([
33
- char([?%, ?&]),
34
- chars(-?;),
35
- char(?;)
36
- ]),
37
- Tok.name_entity
38
- )
39
- )
40
-
41
- # The next four rules are basically the same,
42
- # but with more descriptive names.
43
- defrule extended_tag_name(
44
- chars([?A..?Z, ?a..?z, ?0..?9, ?:, ?!, ?., ?-, ?/, ?%])
45
- )
46
-
47
- defrule extended_attribute_name(
48
- chars([?A..?Z, ?a..?z, ?0..?9, ?:, ?!, ?., ?-, ?/, ?%])
49
- )
50
-
51
- defrule unquoted_attribute_value(
52
- chars([?A..?Z, ?a..?z, ?0..?9, ?:, ?!, ?., ?-, ?/, ?%])
53
- )
54
-
55
- defrule extended_identifier(
56
- chars([?A..?Z, ?a..?z, ?0..?9, ?:, ?!, ?., ?-, ?/, ?%])
57
- )
58
-
59
- # HTML comment. HTML comments can't be nested.
60
- defrule comment(
61
- token(string_like("<!--", "-->"), Tok.comment)
62
- )
63
-
64
- defrule whitespace(
65
- token(spaces1(), Tok.whitespace)
66
- )
67
-
68
- defrule string_double(
69
- token(string_like("\"", "\""), Tok.string_double)
70
- )
71
-
72
- defrule string_single(
73
- token(string_like("'", "'"), Tok.string_single)
74
- )
75
-
76
- # HTML attribute. Recognizes the following formats:
77
- #
78
- # * `key` (e.g. ` <input type="text" disabled>`)
79
- # * `key="value"` (e.g. <script src="myscript.js"></script>)
80
- # * `key='value'` (e.g. <script src='myscript.js'></script>)
81
- # * `key=value` (e.g. `<a href=https://www.w3schools.com></a>`)
82
- defrule attribute(
83
- seq([
84
- # Attribute_name
85
- token(extended_attribute_name(), Tok.name_attribute),
86
- # Optionally:
87
- repeat(
88
- seq([
89
- token(spaces(), Tok.text),
90
- # Equals sign
91
- token(lit(?=), Tok.punctuation),
92
- token(spaces(), Tok.text),
93
- # Attribute value. Can be a string or "naked" extended identifier
94
- alt([
95
- string_double(),
96
- string_single(),
97
- token(unquoted_attribute_value(), Tok.string)
98
- ]),
99
- token(spaces(), Tok.text)
100
- ]), 0, 1)
101
- ])
102
- )
103
-
104
- # Normal text inside a tag
105
- defrule document_text(
106
- repeat(
107
- alt([
108
- entity(),
109
- token(chars([-?<, -?&]), Tok.text)
110
- ]), 1)
111
- )
112
-
113
- # Fallback for the cases where the lexer can't recognize anything else
114
- defrule any_char(
115
- token(char(), Tok.error)
116
- )
117
-
118
- # Top level nodes inside the Root of an HTML
119
- defrule root_node(
120
- alt([
121
- whitespace(),
122
- comment(),
123
- xml_self_closing_tag(),
124
- html_tag(),
125
- html_self_closing_tag(),
126
- document_text(),
127
- any_char()
128
- ])
129
- )
130
-
131
- # The Root rule to lex an HTML document
132
- defrule root(
133
- repeat(root_node())
134
- ), pipe_result_into: List.flatten()
135
-
136
- # Self-closing XML tags: `<tag .../>`
137
- defrule xml_self_closing_tag(
138
- seq([
139
- token(char(?<), Tok.punctuation),
140
- token(spaces(), Tok.text),
141
- token(extended_tag_name() |> put_state(:tag_name, :result), Tok.name_tag),
142
- token(spaces(), Tok.text),
143
- repeat(
144
- seq([
145
- token(spaces1(), Tok.text),
146
- attribute()
147
- ])
148
- ),
149
- token(char(?/), Tok.punctuation),
150
- token(spaces(), Tok.text),
151
- token(char(?>), Tok.punctuation)
152
- ])
153
- ), pipe_result_into: List.flatten
154
-
155
-
156
- # Self-closing tags for HTML: `<img ...>`, `<link ...>`, etc.
157
- # Actually, we accept any tag.
158
- defrule html_self_closing_tag(
159
- seq([
160
- token(char(?<), Tok.punctuation),
161
- token(spaces(), Tok.text),
162
- token(extended_tag_name(), Tok.name_tag),
163
- repeat(
164
- seq([
165
- token(spaces1(), Tok.text),
166
- attribute()
167
- ])
168
- ),
169
- token(spaces(), Tok.text),
170
- token(char(?>), Tok.punctuation)
171
- ])
172
- ), pipe_result_into: List.flatten |> drop_empty
173
-
174
- # Recognizes matching opening and closing tags and their content:
175
- # `<tag ...>...</tag>`
176
- #
177
- # It's the most complex rule in the file.
178
- # Its design of this rule deserves some explanation.
179
- #
180
- # We want to treat the open and close tags differently from the content.
181
- # The easiest way to do it is by tagging the parse results using the `tag` macro.
182
- # The first three tags are:
183
- #
184
- # * `:open` - contains the opening tag and it's attributes `</tag ...>`
185
- # * `:middle` - contains the tag contents (child tags, text, etc.)
186
- # * `:close` - contains the closing tag (i.e. `</tag>`)
187
- #
188
- # These three tags are enough if all we want is to know the tokens that
189
- # belong to the opening and closing tags (to mark them as belonging
190
- # to the same group, for example).
191
- #
192
- # But we want to do something a little more advanced.
193
- #
194
- # We want to render the text according to the containing element.
195
- # For example, we would like the contents of the `<strong></strong>`
196
- # tag to render as bold text.
197
- # For this we need more information than what we have above.
198
- # We need the tag name.
199
- #
200
- # To make it easier for the postprocessor to find the tag name,
201
- # we will add a fourth tag (`:tag_name`) from which we can extract the tag name.
202
- defrule html_tag(
203
- seq([
204
- # **First** tag: the opening delimiter
205
- tag(
206
- :open,
207
- seq([
208
- token(char(?<), Tok.punctuation),
209
- token(spaces(), Tok.text),
210
- # Recognize the tag name, and put it in the state, because we'll need it later
211
- # to match the closing tag.
212
- token(extended_tag_name() |> put_state(:tag_name, :result), Tok.name_tag),
213
- # Match the attributes
214
- repeat(
215
- seq([
216
- token(spaces1(), Tok.text),
217
- attribute()
218
- ])
219
- ),
220
- token(char(?>), Tok.punctuation)
221
- ])
222
- ),
223
- # **Second** tag: the element contents
224
- tag(:middle,
225
- repeat(
226
- lookahead_not(
227
- seq([
228
- token(char(?<), Tok.punctuation),
229
- token(spaces(), Tok.text),
230
- token(char(?/), Tok.punctuation),
231
- token(spaces(), Tok.text),
232
- get_state_into(:tag_name, token(lit(&1), Tok.name_tag)),
233
- token(spaces(), Tok.text),
234
- token(char(?>), Tok.punctuation)
235
- ])
236
- ) |> root_node
237
- )
238
- ),
239
- # **Third* tag: the closing tag
240
- tag(
241
- :close,
242
- seq([
243
- token(char(?<), Tok.punctuation),
244
- token(spaces(), Tok.text),
245
- token(char(?/), Tok.punctuation),
246
- token(spaces(), Tok.text),
247
- # Get the tag name from the state and try to match it
248
- get_state_into(:tag_name, token(lit(&1), Tok.name_tag)),
249
- token(spaces(), Tok.text),
250
- token(char(?>), Tok.punctuation)
251
- ])
252
- ),
253
- # ** Fourth** tag: the `tag_name`.
254
- # Above, we have put it inside the state.
255
- # The easiest way to make it available fo the function we're going to pipe this into
256
- # is to get the state into a parser that returns a constant string.
257
- # This will be the `success` parser.
258
- tag(:tag_name, get_state_into(:tag_name, success(&1)))
259
- ])
260
- # Now we've created an ssociation list which we can desconstruct
261
- # and process.
262
- ), pipe_result_into: (fn [open: open,
263
- middle: middle,
264
- close: close,
265
- tag_name: tag_name] ->
266
- # Generate a unique value.
267
- # This value is random, so it makes the output of the lexer non-deterministic.
268
- # The value will be unique for each BEAM run.
269
- # This degree of uniqueness is probably good enough for our purposes.
270
- uid = unique_value()
271
-
272
- new_result =
273
- # Mark the opening tag as belonging to the group `uid`
274
- (open |> List.flatten |> drop_empty |> as_group(uid)) ++
275
- # Apply markup to text that has no other markup.
276
- # Does not override the markup set by child tags.
277
- (middle |> List.flatten |> apply_markup_to_text(tag_name)) ++
278
- # Mark the closing tag as belonging to the group `uid`
279
- (close |> drop_empty |> as_group(uid))
280
-
281
- new_result
282
- end).()
283
-
284
- defp apply_markup_to_text(tokens, tag_name) do
285
- # Look up the correct markup for our tag
286
- case markup_for_html_tag(tag_name) do
287
- # If no markup is to be set, return the tokens unchanged.
288
- nil -> tokens
289
- # Otherwise, set the markup
290
- new_tag -> Enum.map(tokens, &(change_token_tag(&1, new_tag)))
291
- end
292
- end
293
-
294
- # For each HTML tag, return the markup for the child tags
295
- defp markup_for_html_tag("b"), do: Tok.generic_strong
296
- defp markup_for_html_tag("strong"), do: Tok.generic_strong
297
- defp markup_for_html_tag("i"), do: Tok.generic_emph
298
- defp markup_for_html_tag("em"), do: Tok.generic_emph
299
- defp markup_for_html_tag("s"), do: Tok.generic_deleted
300
- defp markup_for_html_tag("kbd"), do: Tok.generic_output
301
- defp markup_for_html_tag("h1"), do: Tok.generic_heading
302
- defp markup_for_html_tag("h2"), do: Tok.generic_subheading
303
- defp markup_for_html_tag("h3"), do: Tok.generic_subheading
304
- defp markup_for_html_tag("h4"), do: Tok.generic_subheading
305
- defp markup_for_html_tag("h5"), do: Tok.generic_subheading
306
- defp markup_for_html_tag("h6"), do: Tok.generic_subheading
307
- defp markup_for_html_tag(_), do: nil
308
-
309
- defp change_token_tag({old_token_tag, meta, value} = old_token, new_token_tag) do
310
- case old_token_tag do
311
- Tok.text -> {new_token_tag, meta, value}
312
- _ -> old_token
313
- end
314
- end
315
-
316
-
317
- def lex(source, merge \\ true) do
318
- raw = parse(source, root).result
319
- case merge do
320
- false -> raw
321
- true -> Makeup.Lexer.merge(raw)
322
- end
323
- end
324
-
325
- end
\ No newline at end of file
changed mix.exs
 
@@ -1,10 +1,10 @@
1
- defmodule Sandbox.Mixfile do
1
+ defmodule Makeup.Mixfile do
2
2
use Mix.Project
3
3
4
4
def project do
5
5
[
6
6
app: :makeup,
7
- version: "0.1.3",
7
+ version: "0.2.0",
8
8
elixir: "~> 1.0",
9
9
start_permanent: Mix.env == :prod,
10
10
deps: deps(),