docs2json: no errors/warnings with simple anchors
[lttng-docs.git] / tools / docs2json.py
CommitLineData
60757f4b
PP
1#!/usr/bin/env python3
2
3# The MIT License (MIT)
4#
5# Copyright (c) 2015 Philippe Proulx <pproulx@efficios.com>
6#
7# Permission is hereby granted, free of charge, to any person obtaining a copy
8# of this software and associated documentation files (the "Software"), to deal
9# in the Software without restriction, including without limitation the rights
10# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11# copies of the Software, and to permit persons to whom the Software is
12# furnished to do so, subject to the following conditions:
13#
14# The above copyright notice and this permission notice shall be included in
15# all copies or substantial portions of the Software.
16#
17# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23# THE SOFTWARE.
24
25import re
26import os
27import sys
28import json
29from termcolor import colored
30
31
32_TOC_PATH = 'toc/docs.yml'
33_CONTENTS_ROOT_PATH = 'contents'
34
35
36class _Link:
37 pass
38
39
40class _IntLink(_Link):
41 def __init__(self, section):
42 self._section = section
43
44 @property
45 def section(self):
46 return self._section
47
48 def __eq__(self, other):
49 if type(self) != type(other):
50 return False
51
52 return self._section == other._section
53
54 def __hash__(self):
55 return hash(self._section)
56
57 def to_json(self):
58 return {
59 'section': self._section,
60 }
61
62
63class _ExtLink(_Link):
64 def __init__(self, url):
65 self._url = url
66
67 @property
68 def url(self):
69 return self._url
70
71 def __eq__(self, other):
72 if type(self) != type(other):
73 return False
74
75 return self._url == other._url
76
77 def __hash__(self):
78 return hash(self._url)
79
80 def to_json(self):
81 return {
82 'url': self._url,
83 }
84
85
86class _SectionInfo:
87 def __init__(self, path):
88 self._path = path
89 self._in_links = set()
90 self._out_links = set()
91
92 @property
93 def path(self):
94 return self._path
95
96 @property
97 def in_links(self):
98 return self._in_links
99
100 @property
101 def out_links(self):
102 return self._out_links
103
104 def add_in_link(self, link):
105 self._in_links.add(link)
106
107 def add_out_link(self, link):
108 self._out_links.add(link)
109
110 def to_json(self):
111 section_json = {
112 'path': self.path,
113 }
114 in_links_json = []
115 out_links_json = []
116
117 for in_link in self.in_links:
118 in_links_json.append(in_link.to_json())
119
120 for out_link in self.out_links:
121 out_links_json.append(out_link.to_json())
122
123 section_json['in-links'] = in_links_json
124 section_json['out-links'] = out_links_json
125
126 return section_json
127
128
129class _Registry:
130 def __init__(self):
131 self._section_infos = {}
132
133 def register_section_info(self, sid, section_info):
134 self._section_infos[sid] = section_info
135
136 def _resolve_in_links(self):
137 for sid in self._section_infos:
138 section_info = self._section_infos[sid]
139 for out_link in section_info.out_links:
140 if type(out_link) != _IntLink:
141 continue
142
143 target_sid = out_link.section
144 target_section_info = self._section_infos[target_sid]
145 target_section_info.add_in_link(_IntLink(sid))
146
147 def to_json(self):
148 self._resolve_in_links()
149 sections_json = {}
150
151 for sid, section_info in self._section_infos.items():
152 sections_json[sid] = section_info.to_json()
153
154 return json.dumps(sections_json)
155
156
157def _perror(filename, msg):
158 s = '{} {} {}'.format(filename, colored('Error:', 'red'),
159 colored(msg, 'red', attrs=['bold']))
160 print(s, file=sys.stderr)
161
162
163def _pwarn(filename, msg):
164 s = '{} {} {}'.format(filename, colored('Warning:', 'yellow'),
165 colored(msg, 'yellow', attrs=['bold']))
166 print(s, file=sys.stderr)
167
168
169def _get_files(root):
170 files = []
171
172 for dirpath, dirnames, filenames in os.walk(root):
173 for f in filenames:
174 files.append(os.path.join(dirpath, f))
175
176 return sorted(files)
177
178
179def _get_toc_ids(path):
180 p = re.compile(r'id\s*:\s*(.+)$', flags=re.M)
181
182 with open(path) as f:
183 orig_ids = p.findall(f.read())
184
185 ids = set(orig_ids)
186
187 if len(ids) != len(orig_ids):
188 _perror(path, 'Duplicate IDs')
189 return
190
191 return ids
192
193
194_id_re = re.compile(r'^\s*id:\s*([a-zA-Z0-9_-]+)\s*$', flags=re.M)
195
196
197def _get_sid_from_file(path, c):
198 m = _id_re.search(c)
199
200 if not m:
201 _perror(path, 'No ID found')
202 return
203
204 return m.group(1)
205
206
207_ilink_re = re.compile(r'\[[^\]]+\]\(([^)]+)\)', flags=re.M)
208_elink_re = re.compile(r'<a(?:\s+[^>]+|\s*)>')
ba116466 209_name_re = re.compile(r'name="([^"]+)"')
60757f4b
PP
210_href_re = re.compile(r'href="([^"]+)"')
211_classes_re = re.compile(r'class="([^"]+)"')
212
213
214def _register_section_info(registry, toc_ids, path, c):
215 sid = _get_sid_from_file(path, c)
216
217 if not sid:
218 return False
219
220 ret = True
221 ilinks = _ilink_re.findall(c)
222 elinks = _elink_re.findall(c)
223 section_info = _SectionInfo(path)
224
225 for link in elinks:
226 href = _href_re.search(link)
ba116466 227 name = _name_re.search(link)
60757f4b
PP
228 classes = _classes_re.search(link)
229
ba116466
PP
230 if name and not href:
231 # simple anchor
232 continue
233
60757f4b
PP
234 if classes is None:
235 _pwarn(path, 'External link has no "ext" class: "{}"'.format(link))
236 classes = []
237 else:
238 classes = classes.group(1).split(' ')
239
240 if 'int' in classes and 'ext' in classes:
241 _pwarn(path, 'External link has both "ext" and "int" classes: "{}"'.format(link))
242 elif 'int' not in classes and 'ext' not in classes:
243 _pwarn(path, 'External link has no "ext" or "int" class: "{}"'.format(link))
244
245 if href:
246 href = href.group(1)
247
248 if href.startswith('#') and 'int' not in classes:
ba116466 249 _pwarn(path, 'External link starts with #: "{}"'.format(href))
60757f4b
PP
250
251 if 'int' in classes:
252 ilinks.append(href)
253 continue
254
255 section_info.add_out_link(_ExtLink(href))
ba116466
PP
256 elif not name:
257 _perror(path, 'External link with no "href" or "name" attribute: "{}"'.format(link))
60757f4b
PP
258 ret = False
259
260 for link in ilinks:
261 if not link.startswith('#doc-'):
262 s = 'Internal link does not start with "#doc-": "{}"'.format(link)
263 _perror(path, s)
264 ret = False
265 continue
266
267 target_sid = link[5:]
268
269 if target_sid not in toc_ids:
270 _perror(path, 'Dead internal link: "{}"'.format(link))
271 ret = False
272 else:
273 section_info.add_out_link(_IntLink(target_sid))
274
275 registry.register_section_info(sid, section_info)
276
277 return ret
278
279
280def _docs2json(toc_ids, contents_files):
281 ret = True
282 registry = _Registry()
283
284 i = 1
285
286 for path in contents_files:
287 with open(path) as f:
288 c = f.read()
289
290 ret &= _register_section_info(registry, toc_ids, path, c)
291
292 print(registry.to_json())
293
294 return ret
295
296
297def _check_non_md(files):
298 ret = True
299
300 for f in files:
301 if not f.endswith('.md'):
302 _perror(f, 'Wrong, non-Markdown file: "{}"'.format(f))
303 ret = False
304
305 return ret
306
307
308def docs2json():
309 toc_ids = _get_toc_ids(_TOC_PATH)
310
311 if toc_ids is None:
312 return False
313
314 contents_files = _get_files(_CONTENTS_ROOT_PATH)
315
316 if not _check_non_md(contents_files):
317 return False
318
319 if not _docs2json(toc_ids, contents_files):
320 return False
321
322 return True
323
324
325if __name__ == '__main__':
326 sys.exit(0 if docs2json() else 1)
This page took 0.032759 seconds and 4 git commands to generate.