Create docs2json.py and remove checkdocs.py
[lttng-docs.git] / tools / docs2json.py
1 #!/usr/bin/env python3
2
3 # The MIT License (MIT)
4 #
5 # Copyright (c) 2015 Philippe Proulx <pproulx@efficios.com>
6 #
7 # Permission is hereby granted, free of charge, to any person obtaining a copy
8 # of this software and associated documentation files (the "Software"), to deal
9 # in the Software without restriction, including without limitation the rights
10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 # copies of the Software, and to permit persons to whom the Software is
12 # furnished to do so, subject to the following conditions:
13 #
14 # The above copyright notice and this permission notice shall be included in
15 # all copies or substantial portions of the Software.
16 #
17 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 # THE SOFTWARE.
24
25 import re
26 import os
27 import sys
28 import json
29 from termcolor import colored
30
31
32 _TOC_PATH = 'toc/docs.yml'
33 _CONTENTS_ROOT_PATH = 'contents'
34
35
36 class _Link:
37 pass
38
39
40 class _IntLink(_Link):
41 def __init__(self, section):
42 self._section = section
43
44 @property
45 def section(self):
46 return self._section
47
48 def __eq__(self, other):
49 if type(self) != type(other):
50 return False
51
52 return self._section == other._section
53
54 def __hash__(self):
55 return hash(self._section)
56
57 def to_json(self):
58 return {
59 'section': self._section,
60 }
61
62
63 class _ExtLink(_Link):
64 def __init__(self, url):
65 self._url = url
66
67 @property
68 def url(self):
69 return self._url
70
71 def __eq__(self, other):
72 if type(self) != type(other):
73 return False
74
75 return self._url == other._url
76
77 def __hash__(self):
78 return hash(self._url)
79
80 def to_json(self):
81 return {
82 'url': self._url,
83 }
84
85
86 class _SectionInfo:
87 def __init__(self, path):
88 self._path = path
89 self._in_links = set()
90 self._out_links = set()
91
92 @property
93 def path(self):
94 return self._path
95
96 @property
97 def in_links(self):
98 return self._in_links
99
100 @property
101 def out_links(self):
102 return self._out_links
103
104 def add_in_link(self, link):
105 self._in_links.add(link)
106
107 def add_out_link(self, link):
108 self._out_links.add(link)
109
110 def to_json(self):
111 section_json = {
112 'path': self.path,
113 }
114 in_links_json = []
115 out_links_json = []
116
117 for in_link in self.in_links:
118 in_links_json.append(in_link.to_json())
119
120 for out_link in self.out_links:
121 out_links_json.append(out_link.to_json())
122
123 section_json['in-links'] = in_links_json
124 section_json['out-links'] = out_links_json
125
126 return section_json
127
128
129 class _Registry:
130 def __init__(self):
131 self._section_infos = {}
132
133 def register_section_info(self, sid, section_info):
134 self._section_infos[sid] = section_info
135
136 def _resolve_in_links(self):
137 for sid in self._section_infos:
138 section_info = self._section_infos[sid]
139 for out_link in section_info.out_links:
140 if type(out_link) != _IntLink:
141 continue
142
143 target_sid = out_link.section
144 target_section_info = self._section_infos[target_sid]
145 target_section_info.add_in_link(_IntLink(sid))
146
147 def to_json(self):
148 self._resolve_in_links()
149 sections_json = {}
150
151 for sid, section_info in self._section_infos.items():
152 sections_json[sid] = section_info.to_json()
153
154 return json.dumps(sections_json)
155
156
157 def _perror(filename, msg):
158 s = '{} {} {}'.format(filename, colored('Error:', 'red'),
159 colored(msg, 'red', attrs=['bold']))
160 print(s, file=sys.stderr)
161
162
163 def _pwarn(filename, msg):
164 s = '{} {} {}'.format(filename, colored('Warning:', 'yellow'),
165 colored(msg, 'yellow', attrs=['bold']))
166 print(s, file=sys.stderr)
167
168
169 def _get_files(root):
170 files = []
171
172 for dirpath, dirnames, filenames in os.walk(root):
173 for f in filenames:
174 files.append(os.path.join(dirpath, f))
175
176 return sorted(files)
177
178
179 def _get_toc_ids(path):
180 p = re.compile(r'id\s*:\s*(.+)$', flags=re.M)
181
182 with open(path) as f:
183 orig_ids = p.findall(f.read())
184
185 ids = set(orig_ids)
186
187 if len(ids) != len(orig_ids):
188 _perror(path, 'Duplicate IDs')
189 return
190
191 return ids
192
193
194 _id_re = re.compile(r'^\s*id:\s*([a-zA-Z0-9_-]+)\s*$', flags=re.M)
195
196
197 def _get_sid_from_file(path, c):
198 m = _id_re.search(c)
199
200 if not m:
201 _perror(path, 'No ID found')
202 return
203
204 return m.group(1)
205
206
207 _ilink_re = re.compile(r'\[[^\]]+\]\(([^)]+)\)', flags=re.M)
208 _elink_re = re.compile(r'<a(?:\s+[^>]+|\s*)>')
209 _href_re = re.compile(r'href="([^"]+)"')
210 _classes_re = re.compile(r'class="([^"]+)"')
211
212
213 def _register_section_info(registry, toc_ids, path, c):
214 sid = _get_sid_from_file(path, c)
215
216 if not sid:
217 return False
218
219 ret = True
220 ilinks = _ilink_re.findall(c)
221 elinks = _elink_re.findall(c)
222 section_info = _SectionInfo(path)
223
224 for link in elinks:
225 href = _href_re.search(link)
226 classes = _classes_re.search(link)
227
228 if classes is None:
229 _pwarn(path, 'External link has no "ext" class: "{}"'.format(link))
230 classes = []
231 else:
232 classes = classes.group(1).split(' ')
233
234 if 'int' in classes and 'ext' in classes:
235 _pwarn(path, 'External link has both "ext" and "int" classes: "{}"'.format(link))
236 elif 'int' not in classes and 'ext' not in classes:
237 _pwarn(path, 'External link has no "ext" or "int" class: "{}"'.format(link))
238
239 if href:
240 href = href.group(1)
241
242 if href.startswith('#') and 'int' not in classes:
243 _pwarn(path, 'External link starts with #: "{}"'.format(href.group(1)))
244
245 if 'int' in classes:
246 ilinks.append(href)
247 continue
248
249 section_info.add_out_link(_ExtLink(href))
250 else:
251 _perror(path, 'External link with no "href" attribute: "{}"'.format(link))
252 ret = False
253
254 for link in ilinks:
255 if not link.startswith('#doc-'):
256 s = 'Internal link does not start with "#doc-": "{}"'.format(link)
257 _perror(path, s)
258 ret = False
259 continue
260
261 target_sid = link[5:]
262
263 if target_sid not in toc_ids:
264 _perror(path, 'Dead internal link: "{}"'.format(link))
265 ret = False
266 else:
267 section_info.add_out_link(_IntLink(target_sid))
268
269 registry.register_section_info(sid, section_info)
270
271 return ret
272
273
274 def _docs2json(toc_ids, contents_files):
275 ret = True
276 registry = _Registry()
277
278 i = 1
279
280 for path in contents_files:
281 with open(path) as f:
282 c = f.read()
283
284 ret &= _register_section_info(registry, toc_ids, path, c)
285
286 print(registry.to_json())
287
288 return ret
289
290
291 def _check_non_md(files):
292 ret = True
293
294 for f in files:
295 if not f.endswith('.md'):
296 _perror(f, 'Wrong, non-Markdown file: "{}"'.format(f))
297 ret = False
298
299 return ret
300
301
302 def docs2json():
303 toc_ids = _get_toc_ids(_TOC_PATH)
304
305 if toc_ids is None:
306 return False
307
308 contents_files = _get_files(_CONTENTS_ROOT_PATH)
309
310 if not _check_non_md(contents_files):
311 return False
312
313 if not _docs2json(toc_ids, contents_files):
314 return False
315
316 return True
317
318
319 if __name__ == '__main__':
320 sys.exit(0 if docs2json() else 1)
This page took 0.03559 seconds and 4 git commands to generate.