Skip to content

Commit 75842cb

Browse files
committed
fixed yml by also also upgrading virtualenv to allow correct, newer version of it to be used
1 parent f234c69 commit 75842cb

File tree

2 files changed

+52
-35
lines changed

2 files changed

+52
-35
lines changed

.github/workflows/deploy-docs.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ jobs:
2727
sudo apt install -y texlive-latex-extra graphviz
2828
- name: Install Python Dependencies
2929
run: |
30-
python -m pip install --upgrade pip
30+
python -m pip install --upgrade pip setuptools wheel virtualenv
3131
pip install poetry
3232
poetry config virtualenvs.create false
3333
poetry install

wizard/sandbox_mathpix_2025.ipynb

Lines changed: 51 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -279,16 +279,20 @@
279279
"outputs": [],
280280
"source": [
281281
"# Define the schema for the tutorial output.\n",
282-
"class Exercise(BaseModel):\n",
282+
"class Set_Question(BaseModel):\n",
283283
" title: str = Field(..., description=\"Title of the exercise (only the text, no numbering)\")\n",
284284
" content: str = Field(..., description=\"Content of the exercise (no exercise title, no subquestions)\")\n",
285285
" subquestions: list[str] = Field(..., description=\"List of subquestions within the exercise (only the text, no numbering)\")\n",
286-
" worked_solution_answers: list[str] = Field(..., description=\"List of answers to the exercise (only the text, no numbering)\")\n",
287286
" \n",
288-
"class Tutorial(BaseModel):\n",
289-
" name: str = Field(..., description=\"Title of the tutorial\")\n",
290-
" year: str = Field(..., description=\"Year of the tutorial\")\n",
291-
" exercises: list[Exercise] = Field(..., description=\"List of tutorial questions\")\n",
287+
"class Set_Answer(BaseModel):\n",
288+
" title: str = Field(..., description=\"Title of the exercise (only the text, no numbering)\")\n",
289+
" workedSolutions: list[str] = Field(..., description=\"List of worked solution to subquestions within the exercise (no numbering or counting)\")\n",
290+
"\n",
291+
"class Set(BaseModel):\n",
292+
" name: str = Field(..., description=\"Title of the set\")\n",
293+
" year: str = Field(..., description=\"Year of the set\")\n",
294+
" exercise: list[Set_Question] = Field(..., description=\"List of exercises in the set\")\n",
295+
" workedSolution: list[Set_Answer] = Field(..., description=\"List of worked solutions for the exercises in the set\")\n",
292296
"\n",
293297
"def extract_tutorial_questions(doc_page_content: str) -> dict:\n",
294298
" \"\"\"\n",
@@ -302,43 +306,49 @@
302306
" \"name\": \"<title of tutorial>\",\n",
303307
" \"year\": \"<year of tutorial>\",\n",
304308
" \"exercise\": [\n",
305-
" { title: \"exercise text 1\", content: \"content text exercise 1\", subquestions: [\"subquestion text 1\", \"subquestion text 2\", ...], worked_solution_answers: [\"worked solution answer 1\", \"worked solution answer 2\", ...] },\n",
306-
" { title: \"exercise text 2\", content: \"content text exercise 2\", subquestions: [\"subquestion text 1\", \"subquestion text 2\", ...], worked_solution_answers: [\"worked solution answer 1\", \"worked solution answer 2\", ...] },\n",
309+
" { title: \"exercise text 1\", content: \"content text exercise 1\", subquestions: [\"subquestion text 1\", \"subquestion text 2\", ...],\n",
310+
" { title: \"exercise text 2\", content: \"content text exercise 2\", subquestions: [\"subquestion text 1\", \"subquestion text 2\", ...],\n",
311+
" ...\n",
312+
" ]\n",
313+
" \"workedSolution\": [\n",
314+
" { title: \"exercise text 1\", workedSolutions: [\"solution text 1\", \"solution text 2\", ...] },\n",
315+
" { title: \"exercise text 2\", workedSolutions: [\"solution text 1\", \"solution text 2\", ...] },\n",
307316
" ...\n",
308317
" ]\n",
309318
" }\n",
310319
" \n",
311-
" The tutorial sheet (IMPORTED_TUTORIAL) may contain reference solutions; do not alter\n",
312320
" the original text of the exercises. The function returns a dictionary parsed from the JSON output.\n",
313-
" the questions, parts and their respective answers/worked solutions may not be in the same area, and may not even exist, in which case the function will return empty strings for those fields.\n",
321+
" if any of the text mentions a figure/diagram, then also find the figure and add it to the content of the exercise.\n",
314322
" \n",
315323
" Args:\n",
316-
" doc_page_content (str): The content of the tutorial sheet.\n",
324+
" doc_page_content (str): The content of a set.\n",
317325
" \n",
318326
" Returns:\n",
319327
" dict: A dictionary containing the keys \"name\" and \"exercise\".\n",
320328
" If parsing fails, returns None.\n",
321329
" \"\"\"\n",
322330
" # Initialize the output parser with the Tutorial schema.\n",
323-
" parser = PydanticOutputParser(pydantic_object=Tutorial)\n",
331+
" parser = PydanticOutputParser(pydantic_object=Set)\n",
324332
"\n",
325333
" # Construct the prompt, appending the parser's format instructions.\n",
326334
" prompt = f\"\"\"\n",
327-
" IMPORTED_TUTORIAL\n",
335+
" IMPORTED_SET\n",
328336
" ```markdown\n",
329337
" {doc_page_content}\n",
330338
" ```\n",
331339
"\n",
332-
" IMPORTED_TUTORIAL is a tutorial sheet with several exercises. It may or may\n",
333-
" not include reference solutions. Please infer the title of the tutorial from\n",
334-
" the content, if no suitable name found, just leave as Tutorial 0.0, and extract each individual question as a separate string. Do\n",
335-
" not modify the text of the exercises. Only use $...$ for math expressions.\n",
340+
" IMPORTED_SET is a set of questions. It may or may not include reference solutions.\n",
341+
" Infer the title of the set from the content, if no suitable name found, just leave as Unnamed Set, and extract each individual question as a separate string.\n",
342+
" Do not modify the text of the exercises. \n",
343+
" Only use $...$ for math expressions.\n",
336344
"\n",
337-
" If the exercise mentions figures, then find all the captions of figures (no links). \n",
338-
" Keep the captions as \"Figure Q1 - ...\".\n",
345+
" If the exercise mentions figures/diagrams, then find the diagram (the local path) that it is talking about,\n",
346+
" and include it in the content of the exercise.\n",
339347
"\n",
340348
" If the exercise mentions tables, then include the table in the content.\n",
341349
"\n",
350+
" Ensure that there is a workedSolution for each exercise, which should have the same title and a list of solutions that matches the subquestions.\n",
351+
"\n",
342352
" Return a valid JSON string with the following structure:\n",
343353
" {parser.get_format_instructions()}\n",
344354
" \"\"\"\n",
@@ -384,18 +394,18 @@
384394
"print(f\"Title: {title}\\n\")\n",
385395
"\n",
386396
"# Extract questions\n",
387-
"questions = imported_tutorial[\"exercises\"]\n",
397+
"questions = imported_tutorial[\"exercise\"]\n",
398+
"solutions = imported_tutorial[\"workedSolution\"]\n",
388399
"\n",
389400
"# Loop over and print each question\n",
390-
"for idx, question in enumerate(questions, start=1):\n",
391-
" print(f\"**Question {idx}**:\\n{question.get(\"title\")}\\n\")\n",
401+
"for idx1, (question, solution) in enumerate(zip(questions, solutions), start=1):\n",
402+
" print(f\"**Question {idx1}**:\\n{question.get(\"title\")}\\n\")\n",
392403
" print(f\"Content: {question.get(\"content\")}\\n\")\n",
393-
" print(\"Subquestions:\")\n",
394-
" for subquestion in question.get(\"subquestions\", []):\n",
395-
" print(f\"- {subquestion}\")\n",
396-
" print(\"Worked Solution Answers:\")\n",
397-
" for answer in question.get(\"worked_solution_answers\", []):\n",
398-
" print(f\"- {answer}\")\n",
404+
" for idx2, (subquestion, subanswer) in enumerate(zip(question.get(\"subquestions\", []), solution.get(\"workedSolutions\", [])), start=1):\n",
405+
" print(f\"Question {idx1}:\")\n",
406+
" print(f\"- Subquestion {idx2}: {subquestion}\")\n",
407+
" print(f\"- Worked Solution {idx2}: {subanswer}\")\n",
408+
" print(\"\\n\")\n",
399409
" print(\"-\" * 40) # Separator for readability"
400410
]
401411
},
@@ -438,7 +448,7 @@
438448
" publish: bool = Field(..., description=\"Publish flag\")\n",
439449
" title: str = Field(..., description=\"Question title\")\n",
440450
"\n",
441-
"def create_question_json(question: str) -> dict:\n",
451+
"def create_question_json(question: str, solution: str) -> dict:\n",
442452
" # Initialize the output parser using the defined Pydantic model.\n",
443453
" parser = PydanticOutputParser(pydantic_object=QuestionJson)\n",
444454
"\n",
@@ -480,6 +490,11 @@
480490
" {question}\n",
481491
" ```\n",
482492
"\n",
493+
" IMPORTED_SOLUTION\n",
494+
" ```markdown\n",
495+
" {solution}\n",
496+
" ```\n",
497+
"\n",
483498
" Preserve the markdown math formatting to use $...$ for math expressions. Do not modify the original text of the question.\n",
484499
"\n",
485500
" Infer the final answer and put it in the answerContent field of the part. \n",
@@ -511,26 +526,28 @@
511526
"metadata": {},
512527
"outputs": [],
513528
"source": [
514-
"questions = imported_tutorial[\"exercises\"]\n",
529+
"questions = imported_tutorial[\"exercise\"]\n",
530+
"solutions = imported_tutorial[\"workedSolution\"]\n",
531+
"\n",
515532
"\n",
516533
"# Loop over all questions and question_answers and print each question\n",
517-
"for idx, question in zip(range(1, len(questions)+1), questions):\n",
534+
"for idx, (question, solution) in enumerate(zip(questions, solutions), start=1):\n",
518535
" print(f\"**Question {idx}**:\\n{question}\\n\")\n",
519536
" # print(f\"**Question Answers {idx}**:\\n{question_ans}\\n\")\n",
520537
"\n",
521538
" print(\"INFO: Mapping question in markdown into JSON\")\n",
522-
" question_json = create_question_json(question)\n",
539+
" question_json = create_question_json(question,solution)\n",
523540
" question_json[\"orderNumber\"] = idx-1\n",
524541
" print(f\"INFO: JSON {idx}:\\n{question_json}\\n\")\n",
525542
" \n",
526-
" print(\"INFO: Get figures\")\n",
543+
" # print(\"INFO: Get figures\")\n",
527544
" # updated_question_json = add_figure_references_to_questions(figures, question_json)\n",
528545
" # updated_question_json = add_local_figures_to_questions(figures, question_json)\n",
529546
" updated_question_json = question_json\n",
530547
"\n",
531548
" question_name = updated_question_json[\"title\"].replace(\" \", \"_\")\n",
532549
" question_index = f\"{(idx-1):03}\" \n",
533-
" filename = f\"{set_path}question_{question_index}_{question_name}.json\"\n",
550+
" filename = f\"{output_path}/question_{question_index}_{question_name}.json\"\n",
534551
" print(f\"INFO: writing {filename}\")\n",
535552
" open(filename, \"w\").write(json.dumps(updated_question_json, indent=2))\n",
536553
" \n",

0 commit comments

Comments
 (0)