You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

590 lines
20 KiB

  1. /*
  2. * This file exposes PyAST_Validate interface to check the integrity
  3. * of the given abstract syntax tree (potentially constructed manually).
  4. */
  5. #include "Python.h"
  6. #include "pycore_ast.h" // asdl_stmt_seq
  7. #include <assert.h>
  8. static int validate_stmts(asdl_stmt_seq *);
  9. static int validate_exprs(asdl_expr_seq*, expr_context_ty, int);
  10. static int _validate_nonempty_seq(asdl_seq *, const char *, const char *);
  11. static int validate_stmt(stmt_ty);
  12. static int validate_expr(expr_ty, expr_context_ty);
  13. static int
  14. validate_name(PyObject *name)
  15. {
  16. assert(PyUnicode_Check(name));
  17. static const char * const forbidden[] = {
  18. "None",
  19. "True",
  20. "False",
  21. NULL
  22. };
  23. for (int i = 0; forbidden[i] != NULL; i++) {
  24. if (_PyUnicode_EqualToASCIIString(name, forbidden[i])) {
  25. PyErr_Format(PyExc_ValueError, "Name node can't be used with '%s' constant", forbidden[i]);
  26. return 0;
  27. }
  28. }
  29. return 1;
  30. }
  31. static int
  32. validate_comprehension(asdl_comprehension_seq *gens)
  33. {
  34. Py_ssize_t i;
  35. if (!asdl_seq_LEN(gens)) {
  36. PyErr_SetString(PyExc_ValueError, "comprehension with no generators");
  37. return 0;
  38. }
  39. for (i = 0; i < asdl_seq_LEN(gens); i++) {
  40. comprehension_ty comp = asdl_seq_GET(gens, i);
  41. if (!validate_expr(comp->target, Store) ||
  42. !validate_expr(comp->iter, Load) ||
  43. !validate_exprs(comp->ifs, Load, 0))
  44. return 0;
  45. }
  46. return 1;
  47. }
  48. static int
  49. validate_keywords(asdl_keyword_seq *keywords)
  50. {
  51. Py_ssize_t i;
  52. for (i = 0; i < asdl_seq_LEN(keywords); i++)
  53. if (!validate_expr((asdl_seq_GET(keywords, i))->value, Load))
  54. return 0;
  55. return 1;
  56. }
  57. static int
  58. validate_args(asdl_arg_seq *args)
  59. {
  60. Py_ssize_t i;
  61. for (i = 0; i < asdl_seq_LEN(args); i++) {
  62. arg_ty arg = asdl_seq_GET(args, i);
  63. if (arg->annotation && !validate_expr(arg->annotation, Load))
  64. return 0;
  65. }
  66. return 1;
  67. }
  68. static const char *
  69. expr_context_name(expr_context_ty ctx)
  70. {
  71. switch (ctx) {
  72. case Load:
  73. return "Load";
  74. case Store:
  75. return "Store";
  76. case Del:
  77. return "Del";
  78. default:
  79. Py_UNREACHABLE();
  80. }
  81. }
  82. static int
  83. validate_arguments(arguments_ty args)
  84. {
  85. if (!validate_args(args->posonlyargs) || !validate_args(args->args)) {
  86. return 0;
  87. }
  88. if (args->vararg && args->vararg->annotation
  89. && !validate_expr(args->vararg->annotation, Load)) {
  90. return 0;
  91. }
  92. if (!validate_args(args->kwonlyargs))
  93. return 0;
  94. if (args->kwarg && args->kwarg->annotation
  95. && !validate_expr(args->kwarg->annotation, Load)) {
  96. return 0;
  97. }
  98. if (asdl_seq_LEN(args->defaults) > asdl_seq_LEN(args->posonlyargs) + asdl_seq_LEN(args->args)) {
  99. PyErr_SetString(PyExc_ValueError, "more positional defaults than args on arguments");
  100. return 0;
  101. }
  102. if (asdl_seq_LEN(args->kw_defaults) != asdl_seq_LEN(args->kwonlyargs)) {
  103. PyErr_SetString(PyExc_ValueError, "length of kwonlyargs is not the same as "
  104. "kw_defaults on arguments");
  105. return 0;
  106. }
  107. return validate_exprs(args->defaults, Load, 0) && validate_exprs(args->kw_defaults, Load, 1);
  108. }
  109. static int
  110. validate_constant(PyObject *value)
  111. {
  112. if (value == Py_None || value == Py_Ellipsis)
  113. return 1;
  114. if (PyLong_CheckExact(value)
  115. || PyFloat_CheckExact(value)
  116. || PyComplex_CheckExact(value)
  117. || PyBool_Check(value)
  118. || PyUnicode_CheckExact(value)
  119. || PyBytes_CheckExact(value))
  120. return 1;
  121. if (PyTuple_CheckExact(value) || PyFrozenSet_CheckExact(value)) {
  122. PyObject *it;
  123. it = PyObject_GetIter(value);
  124. if (it == NULL)
  125. return 0;
  126. while (1) {
  127. PyObject *item = PyIter_Next(it);
  128. if (item == NULL) {
  129. if (PyErr_Occurred()) {
  130. Py_DECREF(it);
  131. return 0;
  132. }
  133. break;
  134. }
  135. if (!validate_constant(item)) {
  136. Py_DECREF(it);
  137. Py_DECREF(item);
  138. return 0;
  139. }
  140. Py_DECREF(item);
  141. }
  142. Py_DECREF(it);
  143. return 1;
  144. }
  145. if (!PyErr_Occurred()) {
  146. PyErr_Format(PyExc_TypeError,
  147. "got an invalid type in Constant: %s",
  148. _PyType_Name(Py_TYPE(value)));
  149. }
  150. return 0;
  151. }
  152. static int
  153. validate_expr(expr_ty exp, expr_context_ty ctx)
  154. {
  155. int check_ctx = 1;
  156. expr_context_ty actual_ctx;
  157. /* First check expression context. */
  158. switch (exp->kind) {
  159. case Attribute_kind:
  160. actual_ctx = exp->v.Attribute.ctx;
  161. break;
  162. case Subscript_kind:
  163. actual_ctx = exp->v.Subscript.ctx;
  164. break;
  165. case Starred_kind:
  166. actual_ctx = exp->v.Starred.ctx;
  167. break;
  168. case Name_kind:
  169. if (!validate_name(exp->v.Name.id)) {
  170. return 0;
  171. }
  172. actual_ctx = exp->v.Name.ctx;
  173. break;
  174. case List_kind:
  175. actual_ctx = exp->v.List.ctx;
  176. break;
  177. case Tuple_kind:
  178. actual_ctx = exp->v.Tuple.ctx;
  179. break;
  180. default:
  181. if (ctx != Load) {
  182. PyErr_Format(PyExc_ValueError, "expression which can't be "
  183. "assigned to in %s context", expr_context_name(ctx));
  184. return 0;
  185. }
  186. check_ctx = 0;
  187. /* set actual_ctx to prevent gcc warning */
  188. actual_ctx = 0;
  189. }
  190. if (check_ctx && actual_ctx != ctx) {
  191. PyErr_Format(PyExc_ValueError, "expression must have %s context but has %s instead",
  192. expr_context_name(ctx), expr_context_name(actual_ctx));
  193. return 0;
  194. }
  195. /* Now validate expression. */
  196. switch (exp->kind) {
  197. case BoolOp_kind:
  198. if (asdl_seq_LEN(exp->v.BoolOp.values) < 2) {
  199. PyErr_SetString(PyExc_ValueError, "BoolOp with less than 2 values");
  200. return 0;
  201. }
  202. return validate_exprs(exp->v.BoolOp.values, Load, 0);
  203. case BinOp_kind:
  204. return validate_expr(exp->v.BinOp.left, Load) &&
  205. validate_expr(exp->v.BinOp.right, Load);
  206. case UnaryOp_kind:
  207. return validate_expr(exp->v.UnaryOp.operand, Load);
  208. case Lambda_kind:
  209. return validate_arguments(exp->v.Lambda.args) &&
  210. validate_expr(exp->v.Lambda.body, Load);
  211. case IfExp_kind:
  212. return validate_expr(exp->v.IfExp.test, Load) &&
  213. validate_expr(exp->v.IfExp.body, Load) &&
  214. validate_expr(exp->v.IfExp.orelse, Load);
  215. case Dict_kind:
  216. if (asdl_seq_LEN(exp->v.Dict.keys) != asdl_seq_LEN(exp->v.Dict.values)) {
  217. PyErr_SetString(PyExc_ValueError,
  218. "Dict doesn't have the same number of keys as values");
  219. return 0;
  220. }
  221. /* null_ok=1 for keys expressions to allow dict unpacking to work in
  222. dict literals, i.e. ``{**{a:b}}`` */
  223. return validate_exprs(exp->v.Dict.keys, Load, /*null_ok=*/ 1) &&
  224. validate_exprs(exp->v.Dict.values, Load, /*null_ok=*/ 0);
  225. case Set_kind:
  226. return validate_exprs(exp->v.Set.elts, Load, 0);
  227. #define COMP(NAME) \
  228. case NAME ## _kind: \
  229. return validate_comprehension(exp->v.NAME.generators) && \
  230. validate_expr(exp->v.NAME.elt, Load);
  231. COMP(ListComp)
  232. COMP(SetComp)
  233. COMP(GeneratorExp)
  234. #undef COMP
  235. case DictComp_kind:
  236. return validate_comprehension(exp->v.DictComp.generators) &&
  237. validate_expr(exp->v.DictComp.key, Load) &&
  238. validate_expr(exp->v.DictComp.value, Load);
  239. case Yield_kind:
  240. return !exp->v.Yield.value || validate_expr(exp->v.Yield.value, Load);
  241. case YieldFrom_kind:
  242. return validate_expr(exp->v.YieldFrom.value, Load);
  243. case Await_kind:
  244. return validate_expr(exp->v.Await.value, Load);
  245. case Compare_kind:
  246. if (!asdl_seq_LEN(exp->v.Compare.comparators)) {
  247. PyErr_SetString(PyExc_ValueError, "Compare with no comparators");
  248. return 0;
  249. }
  250. if (asdl_seq_LEN(exp->v.Compare.comparators) !=
  251. asdl_seq_LEN(exp->v.Compare.ops)) {
  252. PyErr_SetString(PyExc_ValueError, "Compare has a different number "
  253. "of comparators and operands");
  254. return 0;
  255. }
  256. return validate_exprs(exp->v.Compare.comparators, Load, 0) &&
  257. validate_expr(exp->v.Compare.left, Load);
  258. case Call_kind:
  259. return validate_expr(exp->v.Call.func, Load) &&
  260. validate_exprs(exp->v.Call.args, Load, 0) &&
  261. validate_keywords(exp->v.Call.keywords);
  262. case Constant_kind:
  263. if (!validate_constant(exp->v.Constant.value)) {
  264. return 0;
  265. }
  266. return 1;
  267. case JoinedStr_kind:
  268. return validate_exprs(exp->v.JoinedStr.values, Load, 0);
  269. case FormattedValue_kind:
  270. if (validate_expr(exp->v.FormattedValue.value, Load) == 0)
  271. return 0;
  272. if (exp->v.FormattedValue.format_spec)
  273. return validate_expr(exp->v.FormattedValue.format_spec, Load);
  274. return 1;
  275. case Attribute_kind:
  276. return validate_expr(exp->v.Attribute.value, Load);
  277. case Subscript_kind:
  278. return validate_expr(exp->v.Subscript.slice, Load) &&
  279. validate_expr(exp->v.Subscript.value, Load);
  280. case Starred_kind:
  281. return validate_expr(exp->v.Starred.value, ctx);
  282. case Slice_kind:
  283. return (!exp->v.Slice.lower || validate_expr(exp->v.Slice.lower, Load)) &&
  284. (!exp->v.Slice.upper || validate_expr(exp->v.Slice.upper, Load)) &&
  285. (!exp->v.Slice.step || validate_expr(exp->v.Slice.step, Load));
  286. case List_kind:
  287. return validate_exprs(exp->v.List.elts, ctx, 0);
  288. case Tuple_kind:
  289. return validate_exprs(exp->v.Tuple.elts, ctx, 0);
  290. case NamedExpr_kind:
  291. return validate_expr(exp->v.NamedExpr.value, Load);
  292. case MatchAs_kind:
  293. PyErr_SetString(PyExc_ValueError,
  294. "MatchAs is only valid in match_case patterns");
  295. return 0;
  296. case MatchOr_kind:
  297. PyErr_SetString(PyExc_ValueError,
  298. "MatchOr is only valid in match_case patterns");
  299. return 0;
  300. /* This last case doesn't have any checking. */
  301. case Name_kind:
  302. return 1;
  303. }
  304. PyErr_SetString(PyExc_SystemError, "unexpected expression");
  305. return 0;
  306. }
  307. static int
  308. validate_pattern(expr_ty p)
  309. {
  310. // Coming soon (thanks Batuhan)!
  311. return 1;
  312. }
  313. static int
  314. _validate_nonempty_seq(asdl_seq *seq, const char *what, const char *owner)
  315. {
  316. if (asdl_seq_LEN(seq))
  317. return 1;
  318. PyErr_Format(PyExc_ValueError, "empty %s on %s", what, owner);
  319. return 0;
  320. }
  321. #define validate_nonempty_seq(seq, what, owner) _validate_nonempty_seq((asdl_seq*)seq, what, owner)
  322. static int
  323. validate_assignlist(asdl_expr_seq *targets, expr_context_ty ctx)
  324. {
  325. return validate_nonempty_seq(targets, "targets", ctx == Del ? "Delete" : "Assign") &&
  326. validate_exprs(targets, ctx, 0);
  327. }
  328. static int
  329. validate_body(asdl_stmt_seq *body, const char *owner)
  330. {
  331. return validate_nonempty_seq(body, "body", owner) && validate_stmts(body);
  332. }
  333. static int
  334. validate_stmt(stmt_ty stmt)
  335. {
  336. Py_ssize_t i;
  337. switch (stmt->kind) {
  338. case FunctionDef_kind:
  339. return validate_body(stmt->v.FunctionDef.body, "FunctionDef") &&
  340. validate_arguments(stmt->v.FunctionDef.args) &&
  341. validate_exprs(stmt->v.FunctionDef.decorator_list, Load, 0) &&
  342. (!stmt->v.FunctionDef.returns ||
  343. validate_expr(stmt->v.FunctionDef.returns, Load));
  344. case ClassDef_kind:
  345. return validate_body(stmt->v.ClassDef.body, "ClassDef") &&
  346. validate_exprs(stmt->v.ClassDef.bases, Load, 0) &&
  347. validate_keywords(stmt->v.ClassDef.keywords) &&
  348. validate_exprs(stmt->v.ClassDef.decorator_list, Load, 0);
  349. case Return_kind:
  350. return !stmt->v.Return.value || validate_expr(stmt->v.Return.value, Load);
  351. case Delete_kind:
  352. return validate_assignlist(stmt->v.Delete.targets, Del);
  353. case Assign_kind:
  354. return validate_assignlist(stmt->v.Assign.targets, Store) &&
  355. validate_expr(stmt->v.Assign.value, Load);
  356. case AugAssign_kind:
  357. return validate_expr(stmt->v.AugAssign.target, Store) &&
  358. validate_expr(stmt->v.AugAssign.value, Load);
  359. case AnnAssign_kind:
  360. if (stmt->v.AnnAssign.target->kind != Name_kind &&
  361. stmt->v.AnnAssign.simple) {
  362. PyErr_SetString(PyExc_TypeError,
  363. "AnnAssign with simple non-Name target");
  364. return 0;
  365. }
  366. return validate_expr(stmt->v.AnnAssign.target, Store) &&
  367. (!stmt->v.AnnAssign.value ||
  368. validate_expr(stmt->v.AnnAssign.value, Load)) &&
  369. validate_expr(stmt->v.AnnAssign.annotation, Load);
  370. case For_kind:
  371. return validate_expr(stmt->v.For.target, Store) &&
  372. validate_expr(stmt->v.For.iter, Load) &&
  373. validate_body(stmt->v.For.body, "For") &&
  374. validate_stmts(stmt->v.For.orelse);
  375. case AsyncFor_kind:
  376. return validate_expr(stmt->v.AsyncFor.target, Store) &&
  377. validate_expr(stmt->v.AsyncFor.iter, Load) &&
  378. validate_body(stmt->v.AsyncFor.body, "AsyncFor") &&
  379. validate_stmts(stmt->v.AsyncFor.orelse);
  380. case While_kind:
  381. return validate_expr(stmt->v.While.test, Load) &&
  382. validate_body(stmt->v.While.body, "While") &&
  383. validate_stmts(stmt->v.While.orelse);
  384. case If_kind:
  385. return validate_expr(stmt->v.If.test, Load) &&
  386. validate_body(stmt->v.If.body, "If") &&
  387. validate_stmts(stmt->v.If.orelse);
  388. case With_kind:
  389. if (!validate_nonempty_seq(stmt->v.With.items, "items", "With"))
  390. return 0;
  391. for (i = 0; i < asdl_seq_LEN(stmt->v.With.items); i++) {
  392. withitem_ty item = asdl_seq_GET(stmt->v.With.items, i);
  393. if (!validate_expr(item->context_expr, Load) ||
  394. (item->optional_vars && !validate_expr(item->optional_vars, Store)))
  395. return 0;
  396. }
  397. return validate_body(stmt->v.With.body, "With");
  398. case AsyncWith_kind:
  399. if (!validate_nonempty_seq(stmt->v.AsyncWith.items, "items", "AsyncWith"))
  400. return 0;
  401. for (i = 0; i < asdl_seq_LEN(stmt->v.AsyncWith.items); i++) {
  402. withitem_ty item = asdl_seq_GET(stmt->v.AsyncWith.items, i);
  403. if (!validate_expr(item->context_expr, Load) ||
  404. (item->optional_vars && !validate_expr(item->optional_vars, Store)))
  405. return 0;
  406. }
  407. return validate_body(stmt->v.AsyncWith.body, "AsyncWith");
  408. case Match_kind:
  409. if (!validate_expr(stmt->v.Match.subject, Load)
  410. || !validate_nonempty_seq(stmt->v.Match.cases, "cases", "Match")) {
  411. return 0;
  412. }
  413. for (i = 0; i < asdl_seq_LEN(stmt->v.Match.cases); i++) {
  414. match_case_ty m = asdl_seq_GET(stmt->v.Match.cases, i);
  415. if (!validate_pattern(m->pattern)
  416. || (m->guard && !validate_expr(m->guard, Load))
  417. || !validate_body(m->body, "match_case")) {
  418. return 0;
  419. }
  420. }
  421. return 1;
  422. case Raise_kind:
  423. if (stmt->v.Raise.exc) {
  424. return validate_expr(stmt->v.Raise.exc, Load) &&
  425. (!stmt->v.Raise.cause || validate_expr(stmt->v.Raise.cause, Load));
  426. }
  427. if (stmt->v.Raise.cause) {
  428. PyErr_SetString(PyExc_ValueError, "Raise with cause but no exception");
  429. return 0;
  430. }
  431. return 1;
  432. case Try_kind:
  433. if (!validate_body(stmt->v.Try.body, "Try"))
  434. return 0;
  435. if (!asdl_seq_LEN(stmt->v.Try.handlers) &&
  436. !asdl_seq_LEN(stmt->v.Try.finalbody)) {
  437. PyErr_SetString(PyExc_ValueError, "Try has neither except handlers nor finalbody");
  438. return 0;
  439. }
  440. if (!asdl_seq_LEN(stmt->v.Try.handlers) &&
  441. asdl_seq_LEN(stmt->v.Try.orelse)) {
  442. PyErr_SetString(PyExc_ValueError, "Try has orelse but no except handlers");
  443. return 0;
  444. }
  445. for (i = 0; i < asdl_seq_LEN(stmt->v.Try.handlers); i++) {
  446. excepthandler_ty handler = asdl_seq_GET(stmt->v.Try.handlers, i);
  447. if ((handler->v.ExceptHandler.type &&
  448. !validate_expr(handler->v.ExceptHandler.type, Load)) ||
  449. !validate_body(handler->v.ExceptHandler.body, "ExceptHandler"))
  450. return 0;
  451. }
  452. return (!asdl_seq_LEN(stmt->v.Try.finalbody) ||
  453. validate_stmts(stmt->v.Try.finalbody)) &&
  454. (!asdl_seq_LEN(stmt->v.Try.orelse) ||
  455. validate_stmts(stmt->v.Try.orelse));
  456. case Assert_kind:
  457. return validate_expr(stmt->v.Assert.test, Load) &&
  458. (!stmt->v.Assert.msg || validate_expr(stmt->v.Assert.msg, Load));
  459. case Import_kind:
  460. return validate_nonempty_seq(stmt->v.Import.names, "names", "Import");
  461. case ImportFrom_kind:
  462. if (stmt->v.ImportFrom.level < 0) {
  463. PyErr_SetString(PyExc_ValueError, "Negative ImportFrom level");
  464. return 0;
  465. }
  466. return validate_nonempty_seq(stmt->v.ImportFrom.names, "names", "ImportFrom");
  467. case Global_kind:
  468. return validate_nonempty_seq(stmt->v.Global.names, "names", "Global");
  469. case Nonlocal_kind:
  470. return validate_nonempty_seq(stmt->v.Nonlocal.names, "names", "Nonlocal");
  471. case Expr_kind:
  472. return validate_expr(stmt->v.Expr.value, Load);
  473. case AsyncFunctionDef_kind:
  474. return validate_body(stmt->v.AsyncFunctionDef.body, "AsyncFunctionDef") &&
  475. validate_arguments(stmt->v.AsyncFunctionDef.args) &&
  476. validate_exprs(stmt->v.AsyncFunctionDef.decorator_list, Load, 0) &&
  477. (!stmt->v.AsyncFunctionDef.returns ||
  478. validate_expr(stmt->v.AsyncFunctionDef.returns, Load));
  479. case Pass_kind:
  480. case Break_kind:
  481. case Continue_kind:
  482. return 1;
  483. default:
  484. PyErr_SetString(PyExc_SystemError, "unexpected statement");
  485. return 0;
  486. }
  487. }
  488. static int
  489. validate_stmts(asdl_stmt_seq *seq)
  490. {
  491. Py_ssize_t i;
  492. for (i = 0; i < asdl_seq_LEN(seq); i++) {
  493. stmt_ty stmt = asdl_seq_GET(seq, i);
  494. if (stmt) {
  495. if (!validate_stmt(stmt))
  496. return 0;
  497. }
  498. else {
  499. PyErr_SetString(PyExc_ValueError,
  500. "None disallowed in statement list");
  501. return 0;
  502. }
  503. }
  504. return 1;
  505. }
  506. static int
  507. validate_exprs(asdl_expr_seq *exprs, expr_context_ty ctx, int null_ok)
  508. {
  509. Py_ssize_t i;
  510. for (i = 0; i < asdl_seq_LEN(exprs); i++) {
  511. expr_ty expr = asdl_seq_GET(exprs, i);
  512. if (expr) {
  513. if (!validate_expr(expr, ctx))
  514. return 0;
  515. }
  516. else if (!null_ok) {
  517. PyErr_SetString(PyExc_ValueError,
  518. "None disallowed in expression list");
  519. return 0;
  520. }
  521. }
  522. return 1;
  523. }
  524. int
  525. _PyAST_Validate(mod_ty mod)
  526. {
  527. int res = 0;
  528. switch (mod->kind) {
  529. case Module_kind:
  530. res = validate_stmts(mod->v.Module.body);
  531. break;
  532. case Interactive_kind:
  533. res = validate_stmts(mod->v.Interactive.body);
  534. break;
  535. case Expression_kind:
  536. res = validate_expr(mod->v.Expression.body, Load);
  537. break;
  538. default:
  539. PyErr_SetString(PyExc_SystemError, "impossible module node");
  540. res = 0;
  541. break;
  542. }
  543. return res;
  544. }
  545. PyObject *
  546. _PyAST_GetDocString(asdl_stmt_seq *body)
  547. {
  548. if (!asdl_seq_LEN(body)) {
  549. return NULL;
  550. }
  551. stmt_ty st = asdl_seq_GET(body, 0);
  552. if (st->kind != Expr_kind) {
  553. return NULL;
  554. }
  555. expr_ty e = st->v.Expr.value;
  556. if (e->kind == Constant_kind && PyUnicode_CheckExact(e->v.Constant.value)) {
  557. return e->v.Constant.value;
  558. }
  559. return NULL;
  560. }