Commit | Line | Data |
---|---|---|
60757f4b PP |
1 | #!/usr/bin/env python3 |
2 | ||
3 | # The MIT License (MIT) | |
4 | # | |
5 | # Copyright (c) 2015 Philippe Proulx <pproulx@efficios.com> | |
6 | # | |
7 | # Permission is hereby granted, free of charge, to any person obtaining a copy | |
8 | # of this software and associated documentation files (the "Software"), to deal | |
9 | # in the Software without restriction, including without limitation the rights | |
10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
11 | # copies of the Software, and to permit persons to whom the Software is | |
12 | # furnished to do so, subject to the following conditions: | |
13 | # | |
14 | # The above copyright notice and this permission notice shall be included in | |
15 | # all copies or substantial portions of the Software. | |
16 | # | |
17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
23 | # THE SOFTWARE. | |
24 | ||
25 | import re | |
26 | import os | |
27 | import sys | |
28 | import json | |
29 | from termcolor import colored | |
30 | ||
31 | ||
32 | _TOC_PATH = 'toc/docs.yml' | |
33 | _CONTENTS_ROOT_PATH = 'contents' | |
34 | ||
35 | ||
36 | class _Link: | |
37 | pass | |
38 | ||
39 | ||
40 | class _IntLink(_Link): | |
41 | def __init__(self, section): | |
42 | self._section = section | |
43 | ||
44 | @property | |
45 | def section(self): | |
46 | return self._section | |
47 | ||
48 | def __eq__(self, other): | |
49 | if type(self) != type(other): | |
50 | return False | |
51 | ||
52 | return self._section == other._section | |
53 | ||
54 | def __hash__(self): | |
55 | return hash(self._section) | |
56 | ||
57 | def to_json(self): | |
58 | return { | |
59 | 'section': self._section, | |
60 | } | |
61 | ||
62 | ||
63 | class _ExtLink(_Link): | |
64 | def __init__(self, url): | |
65 | self._url = url | |
66 | ||
67 | @property | |
68 | def url(self): | |
69 | return self._url | |
70 | ||
71 | def __eq__(self, other): | |
72 | if type(self) != type(other): | |
73 | return False | |
74 | ||
75 | return self._url == other._url | |
76 | ||
77 | def __hash__(self): | |
78 | return hash(self._url) | |
79 | ||
80 | def to_json(self): | |
81 | return { | |
82 | 'url': self._url, | |
83 | } | |
84 | ||
85 | ||
86 | class _SectionInfo: | |
87 | def __init__(self, path): | |
88 | self._path = path | |
89 | self._in_links = set() | |
90 | self._out_links = set() | |
91 | ||
92 | @property | |
93 | def path(self): | |
94 | return self._path | |
95 | ||
96 | @property | |
97 | def in_links(self): | |
98 | return self._in_links | |
99 | ||
100 | @property | |
101 | def out_links(self): | |
102 | return self._out_links | |
103 | ||
104 | def add_in_link(self, link): | |
105 | self._in_links.add(link) | |
106 | ||
107 | def add_out_link(self, link): | |
108 | self._out_links.add(link) | |
109 | ||
110 | def to_json(self): | |
111 | section_json = { | |
112 | 'path': self.path, | |
113 | } | |
114 | in_links_json = [] | |
115 | out_links_json = [] | |
116 | ||
117 | for in_link in self.in_links: | |
118 | in_links_json.append(in_link.to_json()) | |
119 | ||
120 | for out_link in self.out_links: | |
121 | out_links_json.append(out_link.to_json()) | |
122 | ||
123 | section_json['in-links'] = in_links_json | |
124 | section_json['out-links'] = out_links_json | |
125 | ||
126 | return section_json | |
127 | ||
128 | ||
129 | class _Registry: | |
130 | def __init__(self): | |
131 | self._section_infos = {} | |
132 | ||
133 | def register_section_info(self, sid, section_info): | |
134 | self._section_infos[sid] = section_info | |
135 | ||
136 | def _resolve_in_links(self): | |
137 | for sid in self._section_infos: | |
138 | section_info = self._section_infos[sid] | |
139 | for out_link in section_info.out_links: | |
140 | if type(out_link) != _IntLink: | |
141 | continue | |
142 | ||
143 | target_sid = out_link.section | |
144 | target_section_info = self._section_infos[target_sid] | |
145 | target_section_info.add_in_link(_IntLink(sid)) | |
146 | ||
147 | def to_json(self): | |
148 | self._resolve_in_links() | |
149 | sections_json = {} | |
150 | ||
151 | for sid, section_info in self._section_infos.items(): | |
152 | sections_json[sid] = section_info.to_json() | |
153 | ||
154 | return json.dumps(sections_json) | |
155 | ||
156 | ||
157 | def _perror(filename, msg): | |
158 | s = '{} {} {}'.format(filename, colored('Error:', 'red'), | |
159 | colored(msg, 'red', attrs=['bold'])) | |
160 | print(s, file=sys.stderr) | |
161 | ||
162 | ||
163 | def _pwarn(filename, msg): | |
164 | s = '{} {} {}'.format(filename, colored('Warning:', 'yellow'), | |
165 | colored(msg, 'yellow', attrs=['bold'])) | |
166 | print(s, file=sys.stderr) | |
167 | ||
168 | ||
169 | def _get_files(root): | |
170 | files = [] | |
171 | ||
172 | for dirpath, dirnames, filenames in os.walk(root): | |
173 | for f in filenames: | |
174 | files.append(os.path.join(dirpath, f)) | |
175 | ||
176 | return sorted(files) | |
177 | ||
178 | ||
179 | def _get_toc_ids(path): | |
180 | p = re.compile(r'id\s*:\s*(.+)$', flags=re.M) | |
181 | ||
182 | with open(path) as f: | |
183 | orig_ids = p.findall(f.read()) | |
184 | ||
185 | ids = set(orig_ids) | |
186 | ||
187 | if len(ids) != len(orig_ids): | |
188 | _perror(path, 'Duplicate IDs') | |
189 | return | |
190 | ||
191 | return ids | |
192 | ||
193 | ||
194 | _id_re = re.compile(r'^\s*id:\s*([a-zA-Z0-9_-]+)\s*$', flags=re.M) | |
195 | ||
196 | ||
197 | def _get_sid_from_file(path, c): | |
198 | m = _id_re.search(c) | |
199 | ||
200 | if not m: | |
201 | _perror(path, 'No ID found') | |
202 | return | |
203 | ||
204 | return m.group(1) | |
205 | ||
206 | ||
207 | _ilink_re = re.compile(r'\[[^\]]+\]\(([^)]+)\)', flags=re.M) | |
208 | _elink_re = re.compile(r'<a(?:\s+[^>]+|\s*)>') | |
ba116466 | 209 | _name_re = re.compile(r'name="([^"]+)"') |
60757f4b PP |
210 | _href_re = re.compile(r'href="([^"]+)"') |
211 | _classes_re = re.compile(r'class="([^"]+)"') | |
212 | ||
213 | ||
214 | def _register_section_info(registry, toc_ids, path, c): | |
215 | sid = _get_sid_from_file(path, c) | |
216 | ||
217 | if not sid: | |
218 | return False | |
219 | ||
220 | ret = True | |
221 | ilinks = _ilink_re.findall(c) | |
222 | elinks = _elink_re.findall(c) | |
223 | section_info = _SectionInfo(path) | |
224 | ||
225 | for link in elinks: | |
226 | href = _href_re.search(link) | |
ba116466 | 227 | name = _name_re.search(link) |
60757f4b PP |
228 | classes = _classes_re.search(link) |
229 | ||
ba116466 PP |
230 | if name and not href: |
231 | # simple anchor | |
232 | continue | |
233 | ||
60757f4b PP |
234 | if classes is None: |
235 | _pwarn(path, 'External link has no "ext" class: "{}"'.format(link)) | |
236 | classes = [] | |
237 | else: | |
238 | classes = classes.group(1).split(' ') | |
239 | ||
240 | if 'int' in classes and 'ext' in classes: | |
241 | _pwarn(path, 'External link has both "ext" and "int" classes: "{}"'.format(link)) | |
242 | elif 'int' not in classes and 'ext' not in classes: | |
243 | _pwarn(path, 'External link has no "ext" or "int" class: "{}"'.format(link)) | |
244 | ||
245 | if href: | |
246 | href = href.group(1) | |
247 | ||
248 | if href.startswith('#') and 'int' not in classes: | |
ba116466 | 249 | _pwarn(path, 'External link starts with #: "{}"'.format(href)) |
60757f4b PP |
250 | |
251 | if 'int' in classes: | |
252 | ilinks.append(href) | |
253 | continue | |
254 | ||
255 | section_info.add_out_link(_ExtLink(href)) | |
ba116466 PP |
256 | elif not name: |
257 | _perror(path, 'External link with no "href" or "name" attribute: "{}"'.format(link)) | |
60757f4b PP |
258 | ret = False |
259 | ||
260 | for link in ilinks: | |
261 | if not link.startswith('#doc-'): | |
262 | s = 'Internal link does not start with "#doc-": "{}"'.format(link) | |
263 | _perror(path, s) | |
264 | ret = False | |
265 | continue | |
266 | ||
267 | target_sid = link[5:] | |
268 | ||
269 | if target_sid not in toc_ids: | |
270 | _perror(path, 'Dead internal link: "{}"'.format(link)) | |
271 | ret = False | |
272 | else: | |
273 | section_info.add_out_link(_IntLink(target_sid)) | |
274 | ||
275 | registry.register_section_info(sid, section_info) | |
276 | ||
277 | return ret | |
278 | ||
279 | ||
280 | def _docs2json(toc_ids, contents_files): | |
281 | ret = True | |
282 | registry = _Registry() | |
283 | ||
284 | i = 1 | |
285 | ||
286 | for path in contents_files: | |
287 | with open(path) as f: | |
288 | c = f.read() | |
289 | ||
290 | ret &= _register_section_info(registry, toc_ids, path, c) | |
291 | ||
292 | print(registry.to_json()) | |
293 | ||
294 | return ret | |
295 | ||
296 | ||
297 | def _check_non_md(files): | |
298 | ret = True | |
299 | ||
300 | for f in files: | |
301 | if not f.endswith('.md'): | |
302 | _perror(f, 'Wrong, non-Markdown file: "{}"'.format(f)) | |
303 | ret = False | |
304 | ||
305 | return ret | |
306 | ||
307 | ||
308 | def docs2json(): | |
309 | toc_ids = _get_toc_ids(_TOC_PATH) | |
310 | ||
311 | if toc_ids is None: | |
312 | return False | |
313 | ||
314 | contents_files = _get_files(_CONTENTS_ROOT_PATH) | |
315 | ||
316 | if not _check_non_md(contents_files): | |
317 | return False | |
318 | ||
319 | if not _docs2json(toc_ids, contents_files): | |
320 | return False | |
321 | ||
322 | return True | |
323 | ||
324 | ||
325 | if __name__ == '__main__': | |
326 | sys.exit(0 if docs2json() else 1) |