You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

562 lines
19 KiB

  1. /*
  2. * This file exposes PyAST_Validate interface to check the integrity
  3. * of the given abstract syntax tree (potentially constructed manually).
  4. */
  5. #include "Python.h"
  6. #include "Python-ast.h"
  7. #include "ast.h"
  8. #include <assert.h>
  9. static int validate_stmts(asdl_stmt_seq *);
  10. static int validate_exprs(asdl_expr_seq*, expr_context_ty, int);
  11. static int _validate_nonempty_seq(asdl_seq *, const char *, const char *);
  12. static int validate_stmt(stmt_ty);
  13. static int validate_expr(expr_ty, expr_context_ty);
  14. static int
  15. validate_name(PyObject *name)
  16. {
  17. assert(PyUnicode_Check(name));
  18. static const char * const forbidden[] = {
  19. "None",
  20. "True",
  21. "False",
  22. NULL
  23. };
  24. for (int i = 0; forbidden[i] != NULL; i++) {
  25. if (_PyUnicode_EqualToASCIIString(name, forbidden[i])) {
  26. PyErr_Format(PyExc_ValueError, "Name node can't be used with '%s' constant", forbidden[i]);
  27. return 0;
  28. }
  29. }
  30. return 1;
  31. }
  32. static int
  33. validate_comprehension(asdl_comprehension_seq *gens)
  34. {
  35. Py_ssize_t i;
  36. if (!asdl_seq_LEN(gens)) {
  37. PyErr_SetString(PyExc_ValueError, "comprehension with no generators");
  38. return 0;
  39. }
  40. for (i = 0; i < asdl_seq_LEN(gens); i++) {
  41. comprehension_ty comp = asdl_seq_GET(gens, i);
  42. if (!validate_expr(comp->target, Store) ||
  43. !validate_expr(comp->iter, Load) ||
  44. !validate_exprs(comp->ifs, Load, 0))
  45. return 0;
  46. }
  47. return 1;
  48. }
  49. static int
  50. validate_keywords(asdl_keyword_seq *keywords)
  51. {
  52. Py_ssize_t i;
  53. for (i = 0; i < asdl_seq_LEN(keywords); i++)
  54. if (!validate_expr((asdl_seq_GET(keywords, i))->value, Load))
  55. return 0;
  56. return 1;
  57. }
  58. static int
  59. validate_args(asdl_arg_seq *args)
  60. {
  61. Py_ssize_t i;
  62. for (i = 0; i < asdl_seq_LEN(args); i++) {
  63. arg_ty arg = asdl_seq_GET(args, i);
  64. if (arg->annotation && !validate_expr(arg->annotation, Load))
  65. return 0;
  66. }
  67. return 1;
  68. }
  69. static const char *
  70. expr_context_name(expr_context_ty ctx)
  71. {
  72. switch (ctx) {
  73. case Load:
  74. return "Load";
  75. case Store:
  76. return "Store";
  77. case Del:
  78. return "Del";
  79. default:
  80. Py_UNREACHABLE();
  81. }
  82. }
  83. static int
  84. validate_arguments(arguments_ty args)
  85. {
  86. if (!validate_args(args->posonlyargs) || !validate_args(args->args)) {
  87. return 0;
  88. }
  89. if (args->vararg && args->vararg->annotation
  90. && !validate_expr(args->vararg->annotation, Load)) {
  91. return 0;
  92. }
  93. if (!validate_args(args->kwonlyargs))
  94. return 0;
  95. if (args->kwarg && args->kwarg->annotation
  96. && !validate_expr(args->kwarg->annotation, Load)) {
  97. return 0;
  98. }
  99. if (asdl_seq_LEN(args->defaults) > asdl_seq_LEN(args->posonlyargs) + asdl_seq_LEN(args->args)) {
  100. PyErr_SetString(PyExc_ValueError, "more positional defaults than args on arguments");
  101. return 0;
  102. }
  103. if (asdl_seq_LEN(args->kw_defaults) != asdl_seq_LEN(args->kwonlyargs)) {
  104. PyErr_SetString(PyExc_ValueError, "length of kwonlyargs is not the same as "
  105. "kw_defaults on arguments");
  106. return 0;
  107. }
  108. return validate_exprs(args->defaults, Load, 0) && validate_exprs(args->kw_defaults, Load, 1);
  109. }
  110. static int
  111. validate_constant(PyObject *value)
  112. {
  113. if (value == Py_None || value == Py_Ellipsis)
  114. return 1;
  115. if (PyLong_CheckExact(value)
  116. || PyFloat_CheckExact(value)
  117. || PyComplex_CheckExact(value)
  118. || PyBool_Check(value)
  119. || PyUnicode_CheckExact(value)
  120. || PyBytes_CheckExact(value))
  121. return 1;
  122. if (PyTuple_CheckExact(value) || PyFrozenSet_CheckExact(value)) {
  123. PyObject *it;
  124. it = PyObject_GetIter(value);
  125. if (it == NULL)
  126. return 0;
  127. while (1) {
  128. PyObject *item = PyIter_Next(it);
  129. if (item == NULL) {
  130. if (PyErr_Occurred()) {
  131. Py_DECREF(it);
  132. return 0;
  133. }
  134. break;
  135. }
  136. if (!validate_constant(item)) {
  137. Py_DECREF(it);
  138. Py_DECREF(item);
  139. return 0;
  140. }
  141. Py_DECREF(item);
  142. }
  143. Py_DECREF(it);
  144. return 1;
  145. }
  146. if (!PyErr_Occurred()) {
  147. PyErr_Format(PyExc_TypeError,
  148. "got an invalid type in Constant: %s",
  149. _PyType_Name(Py_TYPE(value)));
  150. }
  151. return 0;
  152. }
  153. static int
  154. validate_expr(expr_ty exp, expr_context_ty ctx)
  155. {
  156. int check_ctx = 1;
  157. expr_context_ty actual_ctx;
  158. /* First check expression context. */
  159. switch (exp->kind) {
  160. case Attribute_kind:
  161. actual_ctx = exp->v.Attribute.ctx;
  162. break;
  163. case Subscript_kind:
  164. actual_ctx = exp->v.Subscript.ctx;
  165. break;
  166. case Starred_kind:
  167. actual_ctx = exp->v.Starred.ctx;
  168. break;
  169. case Name_kind:
  170. if (!validate_name(exp->v.Name.id)) {
  171. return 0;
  172. }
  173. actual_ctx = exp->v.Name.ctx;
  174. break;
  175. case List_kind:
  176. actual_ctx = exp->v.List.ctx;
  177. break;
  178. case Tuple_kind:
  179. actual_ctx = exp->v.Tuple.ctx;
  180. break;
  181. default:
  182. if (ctx != Load) {
  183. PyErr_Format(PyExc_ValueError, "expression which can't be "
  184. "assigned to in %s context", expr_context_name(ctx));
  185. return 0;
  186. }
  187. check_ctx = 0;
  188. /* set actual_ctx to prevent gcc warning */
  189. actual_ctx = 0;
  190. }
  191. if (check_ctx && actual_ctx != ctx) {
  192. PyErr_Format(PyExc_ValueError, "expression must have %s context but has %s instead",
  193. expr_context_name(ctx), expr_context_name(actual_ctx));
  194. return 0;
  195. }
  196. /* Now validate expression. */
  197. switch (exp->kind) {
  198. case BoolOp_kind:
  199. if (asdl_seq_LEN(exp->v.BoolOp.values) < 2) {
  200. PyErr_SetString(PyExc_ValueError, "BoolOp with less than 2 values");
  201. return 0;
  202. }
  203. return validate_exprs(exp->v.BoolOp.values, Load, 0);
  204. case BinOp_kind:
  205. return validate_expr(exp->v.BinOp.left, Load) &&
  206. validate_expr(exp->v.BinOp.right, Load);
  207. case UnaryOp_kind:
  208. return validate_expr(exp->v.UnaryOp.operand, Load);
  209. case Lambda_kind:
  210. return validate_arguments(exp->v.Lambda.args) &&
  211. validate_expr(exp->v.Lambda.body, Load);
  212. case IfExp_kind:
  213. return validate_expr(exp->v.IfExp.test, Load) &&
  214. validate_expr(exp->v.IfExp.body, Load) &&
  215. validate_expr(exp->v.IfExp.orelse, Load);
  216. case Dict_kind:
  217. if (asdl_seq_LEN(exp->v.Dict.keys) != asdl_seq_LEN(exp->v.Dict.values)) {
  218. PyErr_SetString(PyExc_ValueError,
  219. "Dict doesn't have the same number of keys as values");
  220. return 0;
  221. }
  222. /* null_ok=1 for keys expressions to allow dict unpacking to work in
  223. dict literals, i.e. ``{**{a:b}}`` */
  224. return validate_exprs(exp->v.Dict.keys, Load, /*null_ok=*/ 1) &&
  225. validate_exprs(exp->v.Dict.values, Load, /*null_ok=*/ 0);
  226. case Set_kind:
  227. return validate_exprs(exp->v.Set.elts, Load, 0);
  228. #define COMP(NAME) \
  229. case NAME ## _kind: \
  230. return validate_comprehension(exp->v.NAME.generators) && \
  231. validate_expr(exp->v.NAME.elt, Load);
  232. COMP(ListComp)
  233. COMP(SetComp)
  234. COMP(GeneratorExp)
  235. #undef COMP
  236. case DictComp_kind:
  237. return validate_comprehension(exp->v.DictComp.generators) &&
  238. validate_expr(exp->v.DictComp.key, Load) &&
  239. validate_expr(exp->v.DictComp.value, Load);
  240. case Yield_kind:
  241. return !exp->v.Yield.value || validate_expr(exp->v.Yield.value, Load);
  242. case YieldFrom_kind:
  243. return validate_expr(exp->v.YieldFrom.value, Load);
  244. case Await_kind:
  245. return validate_expr(exp->v.Await.value, Load);
  246. case Compare_kind:
  247. if (!asdl_seq_LEN(exp->v.Compare.comparators)) {
  248. PyErr_SetString(PyExc_ValueError, "Compare with no comparators");
  249. return 0;
  250. }
  251. if (asdl_seq_LEN(exp->v.Compare.comparators) !=
  252. asdl_seq_LEN(exp->v.Compare.ops)) {
  253. PyErr_SetString(PyExc_ValueError, "Compare has a different number "
  254. "of comparators and operands");
  255. return 0;
  256. }
  257. return validate_exprs(exp->v.Compare.comparators, Load, 0) &&
  258. validate_expr(exp->v.Compare.left, Load);
  259. case Call_kind:
  260. return validate_expr(exp->v.Call.func, Load) &&
  261. validate_exprs(exp->v.Call.args, Load, 0) &&
  262. validate_keywords(exp->v.Call.keywords);
  263. case Constant_kind:
  264. if (!validate_constant(exp->v.Constant.value)) {
  265. return 0;
  266. }
  267. return 1;
  268. case JoinedStr_kind:
  269. return validate_exprs(exp->v.JoinedStr.values, Load, 0);
  270. case FormattedValue_kind:
  271. if (validate_expr(exp->v.FormattedValue.value, Load) == 0)
  272. return 0;
  273. if (exp->v.FormattedValue.format_spec)
  274. return validate_expr(exp->v.FormattedValue.format_spec, Load);
  275. return 1;
  276. case Attribute_kind:
  277. return validate_expr(exp->v.Attribute.value, Load);
  278. case Subscript_kind:
  279. return validate_expr(exp->v.Subscript.slice, Load) &&
  280. validate_expr(exp->v.Subscript.value, Load);
  281. case Starred_kind:
  282. return validate_expr(exp->v.Starred.value, ctx);
  283. case Slice_kind:
  284. return (!exp->v.Slice.lower || validate_expr(exp->v.Slice.lower, Load)) &&
  285. (!exp->v.Slice.upper || validate_expr(exp->v.Slice.upper, Load)) &&
  286. (!exp->v.Slice.step || validate_expr(exp->v.Slice.step, Load));
  287. case List_kind:
  288. return validate_exprs(exp->v.List.elts, ctx, 0);
  289. case Tuple_kind:
  290. return validate_exprs(exp->v.Tuple.elts, ctx, 0);
  291. case NamedExpr_kind:
  292. return validate_expr(exp->v.NamedExpr.value, Load);
  293. /* This last case doesn't have any checking. */
  294. case Name_kind:
  295. return 1;
  296. }
  297. PyErr_SetString(PyExc_SystemError, "unexpected expression");
  298. return 0;
  299. }
  300. static int
  301. _validate_nonempty_seq(asdl_seq *seq, const char *what, const char *owner)
  302. {
  303. if (asdl_seq_LEN(seq))
  304. return 1;
  305. PyErr_Format(PyExc_ValueError, "empty %s on %s", what, owner);
  306. return 0;
  307. }
  308. #define validate_nonempty_seq(seq, what, owner) _validate_nonempty_seq((asdl_seq*)seq, what, owner)
  309. static int
  310. validate_assignlist(asdl_expr_seq *targets, expr_context_ty ctx)
  311. {
  312. return validate_nonempty_seq(targets, "targets", ctx == Del ? "Delete" : "Assign") &&
  313. validate_exprs(targets, ctx, 0);
  314. }
  315. static int
  316. validate_body(asdl_stmt_seq *body, const char *owner)
  317. {
  318. return validate_nonempty_seq(body, "body", owner) && validate_stmts(body);
  319. }
  320. static int
  321. validate_stmt(stmt_ty stmt)
  322. {
  323. Py_ssize_t i;
  324. switch (stmt->kind) {
  325. case FunctionDef_kind:
  326. return validate_body(stmt->v.FunctionDef.body, "FunctionDef") &&
  327. validate_arguments(stmt->v.FunctionDef.args) &&
  328. validate_exprs(stmt->v.FunctionDef.decorator_list, Load, 0) &&
  329. (!stmt->v.FunctionDef.returns ||
  330. validate_expr(stmt->v.FunctionDef.returns, Load));
  331. case ClassDef_kind:
  332. return validate_body(stmt->v.ClassDef.body, "ClassDef") &&
  333. validate_exprs(stmt->v.ClassDef.bases, Load, 0) &&
  334. validate_keywords(stmt->v.ClassDef.keywords) &&
  335. validate_exprs(stmt->v.ClassDef.decorator_list, Load, 0);
  336. case Return_kind:
  337. return !stmt->v.Return.value || validate_expr(stmt->v.Return.value, Load);
  338. case Delete_kind:
  339. return validate_assignlist(stmt->v.Delete.targets, Del);
  340. case Assign_kind:
  341. return validate_assignlist(stmt->v.Assign.targets, Store) &&
  342. validate_expr(stmt->v.Assign.value, Load);
  343. case AugAssign_kind:
  344. return validate_expr(stmt->v.AugAssign.target, Store) &&
  345. validate_expr(stmt->v.AugAssign.value, Load);
  346. case AnnAssign_kind:
  347. if (stmt->v.AnnAssign.target->kind != Name_kind &&
  348. stmt->v.AnnAssign.simple) {
  349. PyErr_SetString(PyExc_TypeError,
  350. "AnnAssign with simple non-Name target");
  351. return 0;
  352. }
  353. return validate_expr(stmt->v.AnnAssign.target, Store) &&
  354. (!stmt->v.AnnAssign.value ||
  355. validate_expr(stmt->v.AnnAssign.value, Load)) &&
  356. validate_expr(stmt->v.AnnAssign.annotation, Load);
  357. case For_kind:
  358. return validate_expr(stmt->v.For.target, Store) &&
  359. validate_expr(stmt->v.For.iter, Load) &&
  360. validate_body(stmt->v.For.body, "For") &&
  361. validate_stmts(stmt->v.For.orelse);
  362. case AsyncFor_kind:
  363. return validate_expr(stmt->v.AsyncFor.target, Store) &&
  364. validate_expr(stmt->v.AsyncFor.iter, Load) &&
  365. validate_body(stmt->v.AsyncFor.body, "AsyncFor") &&
  366. validate_stmts(stmt->v.AsyncFor.orelse);
  367. case While_kind:
  368. return validate_expr(stmt->v.While.test, Load) &&
  369. validate_body(stmt->v.While.body, "While") &&
  370. validate_stmts(stmt->v.While.orelse);
  371. case If_kind:
  372. return validate_expr(stmt->v.If.test, Load) &&
  373. validate_body(stmt->v.If.body, "If") &&
  374. validate_stmts(stmt->v.If.orelse);
  375. case With_kind:
  376. if (!validate_nonempty_seq(stmt->v.With.items, "items", "With"))
  377. return 0;
  378. for (i = 0; i < asdl_seq_LEN(stmt->v.With.items); i++) {
  379. withitem_ty item = asdl_seq_GET(stmt->v.With.items, i);
  380. if (!validate_expr(item->context_expr, Load) ||
  381. (item->optional_vars && !validate_expr(item->optional_vars, Store)))
  382. return 0;
  383. }
  384. return validate_body(stmt->v.With.body, "With");
  385. case AsyncWith_kind:
  386. if (!validate_nonempty_seq(stmt->v.AsyncWith.items, "items", "AsyncWith"))
  387. return 0;
  388. for (i = 0; i < asdl_seq_LEN(stmt->v.AsyncWith.items); i++) {
  389. withitem_ty item = asdl_seq_GET(stmt->v.AsyncWith.items, i);
  390. if (!validate_expr(item->context_expr, Load) ||
  391. (item->optional_vars && !validate_expr(item->optional_vars, Store)))
  392. return 0;
  393. }
  394. return validate_body(stmt->v.AsyncWith.body, "AsyncWith");
  395. case Raise_kind:
  396. if (stmt->v.Raise.exc) {
  397. return validate_expr(stmt->v.Raise.exc, Load) &&
  398. (!stmt->v.Raise.cause || validate_expr(stmt->v.Raise.cause, Load));
  399. }
  400. if (stmt->v.Raise.cause) {
  401. PyErr_SetString(PyExc_ValueError, "Raise with cause but no exception");
  402. return 0;
  403. }
  404. return 1;
  405. case Try_kind:
  406. if (!validate_body(stmt->v.Try.body, "Try"))
  407. return 0;
  408. if (!asdl_seq_LEN(stmt->v.Try.handlers) &&
  409. !asdl_seq_LEN(stmt->v.Try.finalbody)) {
  410. PyErr_SetString(PyExc_ValueError, "Try has neither except handlers nor finalbody");
  411. return 0;
  412. }
  413. if (!asdl_seq_LEN(stmt->v.Try.handlers) &&
  414. asdl_seq_LEN(stmt->v.Try.orelse)) {
  415. PyErr_SetString(PyExc_ValueError, "Try has orelse but no except handlers");
  416. return 0;
  417. }
  418. for (i = 0; i < asdl_seq_LEN(stmt->v.Try.handlers); i++) {
  419. excepthandler_ty handler = asdl_seq_GET(stmt->v.Try.handlers, i);
  420. if ((handler->v.ExceptHandler.type &&
  421. !validate_expr(handler->v.ExceptHandler.type, Load)) ||
  422. !validate_body(handler->v.ExceptHandler.body, "ExceptHandler"))
  423. return 0;
  424. }
  425. return (!asdl_seq_LEN(stmt->v.Try.finalbody) ||
  426. validate_stmts(stmt->v.Try.finalbody)) &&
  427. (!asdl_seq_LEN(stmt->v.Try.orelse) ||
  428. validate_stmts(stmt->v.Try.orelse));
  429. case Assert_kind:
  430. return validate_expr(stmt->v.Assert.test, Load) &&
  431. (!stmt->v.Assert.msg || validate_expr(stmt->v.Assert.msg, Load));
  432. case Import_kind:
  433. return validate_nonempty_seq(stmt->v.Import.names, "names", "Import");
  434. case ImportFrom_kind:
  435. if (stmt->v.ImportFrom.level < 0) {
  436. PyErr_SetString(PyExc_ValueError, "Negative ImportFrom level");
  437. return 0;
  438. }
  439. return validate_nonempty_seq(stmt->v.ImportFrom.names, "names", "ImportFrom");
  440. case Global_kind:
  441. return validate_nonempty_seq(stmt->v.Global.names, "names", "Global");
  442. case Nonlocal_kind:
  443. return validate_nonempty_seq(stmt->v.Nonlocal.names, "names", "Nonlocal");
  444. case Expr_kind:
  445. return validate_expr(stmt->v.Expr.value, Load);
  446. case AsyncFunctionDef_kind:
  447. return validate_body(stmt->v.AsyncFunctionDef.body, "AsyncFunctionDef") &&
  448. validate_arguments(stmt->v.AsyncFunctionDef.args) &&
  449. validate_exprs(stmt->v.AsyncFunctionDef.decorator_list, Load, 0) &&
  450. (!stmt->v.AsyncFunctionDef.returns ||
  451. validate_expr(stmt->v.AsyncFunctionDef.returns, Load));
  452. case Pass_kind:
  453. case Break_kind:
  454. case Continue_kind:
  455. return 1;
  456. default:
  457. PyErr_SetString(PyExc_SystemError, "unexpected statement");
  458. return 0;
  459. }
  460. }
  461. static int
  462. validate_stmts(asdl_stmt_seq *seq)
  463. {
  464. Py_ssize_t i;
  465. for (i = 0; i < asdl_seq_LEN(seq); i++) {
  466. stmt_ty stmt = asdl_seq_GET(seq, i);
  467. if (stmt) {
  468. if (!validate_stmt(stmt))
  469. return 0;
  470. }
  471. else {
  472. PyErr_SetString(PyExc_ValueError,
  473. "None disallowed in statement list");
  474. return 0;
  475. }
  476. }
  477. return 1;
  478. }
  479. static int
  480. validate_exprs(asdl_expr_seq *exprs, expr_context_ty ctx, int null_ok)
  481. {
  482. Py_ssize_t i;
  483. for (i = 0; i < asdl_seq_LEN(exprs); i++) {
  484. expr_ty expr = asdl_seq_GET(exprs, i);
  485. if (expr) {
  486. if (!validate_expr(expr, ctx))
  487. return 0;
  488. }
  489. else if (!null_ok) {
  490. PyErr_SetString(PyExc_ValueError,
  491. "None disallowed in expression list");
  492. return 0;
  493. }
  494. }
  495. return 1;
  496. }
  497. int
  498. PyAST_Validate(mod_ty mod)
  499. {
  500. int res = 0;
  501. switch (mod->kind) {
  502. case Module_kind:
  503. res = validate_stmts(mod->v.Module.body);
  504. break;
  505. case Interactive_kind:
  506. res = validate_stmts(mod->v.Interactive.body);
  507. break;
  508. case Expression_kind:
  509. res = validate_expr(mod->v.Expression.body, Load);
  510. break;
  511. default:
  512. PyErr_SetString(PyExc_SystemError, "impossible module node");
  513. res = 0;
  514. break;
  515. }
  516. return res;
  517. }
  518. PyObject *
  519. _PyAST_GetDocString(asdl_stmt_seq *body)
  520. {
  521. if (!asdl_seq_LEN(body)) {
  522. return NULL;
  523. }
  524. stmt_ty st = asdl_seq_GET(body, 0);
  525. if (st->kind != Expr_kind) {
  526. return NULL;
  527. }
  528. expr_ty e = st->v.Expr.value;
  529. if (e->kind == Constant_kind && PyUnicode_CheckExact(e->v.Constant.value)) {
  530. return e->v.Constant.value;
  531. }
  532. return NULL;
  533. }