From a0f4c287de7adb16650b588a78a28561178aea56 Mon Sep 17 00:00:00 2001 From: Axel Patrick Chepanski Gonzaga Date: Wed, 14 Jan 2026 11:04:52 -0300 Subject: [PATCH 1/5] Enhance image generation with error handling and logging; update GitHub Actions workflow for better commit management. --- .github/workflows/main.yml | 20 +++-- generate_images.py | 146 ++++++++++++++++++++++++++----------- github_stats.py | 52 ++++++------- main.py | 6 -- 4 files changed, 139 insertions(+), 85 deletions(-) delete mode 100644 main.py diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index bb0735f..40d19a8 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -2,7 +2,7 @@ name: Generate Stats Images on: push: - branches: [ master ] + branches: [ master, main ] schedule: - cron: "5 0 * * *" workflow_dispatch: @@ -39,18 +39,24 @@ jobs: python3 generate_images.py env: ACCESS_TOKEN: ${{ secrets.ACCESS_TOKEN }} + GITHUB_ACTOR: ${{ github.actor }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + EXCLUDE_FORKED_REPOS: false + # Optional secrets - Python code handles None/empty values gracefully EXCLUDED: ${{ secrets.EXCLUDED }} EXCLUDED_LANGS: ${{ secrets.EXCLUDED_LANGS }} - EXCLUDE_FORKED_REPOS: false GIT_EMAILS: ${{ secrets.GIT_EMAILS }} # Commit all changed files to the repository - name: Commit to the repo run: | - git config --global user.name "AxelPCG/gitgub-stats-modified" + git config --global user.name "AxelPCG/github-stats-modified" git config --global user.email "axelchepanski@hotmail.com" - git add . - # Force the build to succeed, even if no files were changed - git commit -m 'Update generated files' || true - git push + git add generated/ + # Only commit if there are changes + if git diff --staged --quiet; then + echo "No changes to commit" + else + git commit -m 'Update generated files [skip ci]' || exit 1 + git push || exit 1 + fi diff --git a/generate_images.py b/generate_images.py index c20867f..3776533 100644 --- a/generate_images.py +++ b/generate_images.py @@ -32,23 +32,40 @@ async def generate_overview(s: Stats) -> None: Generate an SVG badge with summary statistics :param s: Represents user's GitHub statistics """ - with open("templates/overview.svg", "r") as f: - output = f.read() - - output = re.sub("{{ name }}", await s.name, output) - output = re.sub("{{ stars }}", f"{await s.stargazers:,}", output) - output = re.sub("{{ forks }}", f"{await s.forks:,}", output) - output = re.sub("{{ contributions }}", f"{await s.total_contributions:,}", output) - output = re.sub("{{ views }}", f"{await s.views:,}", output) - output = re.sub("{{ repos }}", f"{len(await s.repos):,}", output) - commits = await s.total_commits - output = re.sub("{{ commits }}", f"{commits:,}", output) - output = re.sub("{{ prs }}", f"{await s.prs:,}", output) - output = re.sub("{{ issues }}", f"{await s.issues:,}", output) - - generate_output_folder() - with open("generated/overview.svg", "w") as f: - f.write(output) + try: + print("Starting generation of overview.svg...") + with open("templates/overview.svg", "r") as f: + output = f.read() + + print("Fetching statistics data...") + output = re.sub("{{ name }}", await s.name, output) + output = re.sub("{{ stars }}", f"{await s.stargazers:,}", output) + output = re.sub("{{ forks }}", f"{await s.forks:,}", output) + output = re.sub("{{ contributions }}", f"{await s.total_contributions:,}", output) + output = re.sub("{{ views }}", f"{await s.views:,}", output) + output = re.sub("{{ repos }}", f"{len(await s.repos):,}", output) + commits = await s.total_commits + output = re.sub("{{ commits }}", f"{commits:,}", output) + output = re.sub("{{ prs }}", f"{await s.prs:,}", output) + output = re.sub("{{ issues }}", f"{await s.issues:,}", output) + + generate_output_folder() + output_path = "generated/overview.svg" + with open(output_path, "w", encoding="utf-8") as f: + f.write(output) + + # Verify file was created and has content + if not os.path.exists(output_path): + raise FileNotFoundError(f"Failed to create {output_path}") + file_size = os.path.getsize(output_path) + if file_size == 0: + raise ValueError(f"Generated {output_path} is empty!") + print(f"Successfully generated overview.svg ({file_size} bytes)") + except Exception as e: + print(f"ERROR generating overview.svg: {e}") + import traceback + traceback.print_exc() + raise async def generate_languages(s: Stats) -> None: @@ -56,24 +73,35 @@ async def generate_languages(s: Stats) -> None: Generate an SVG badge with summary languages used :param s: Represents user's GitHub statistics """ - with open("templates/languages.svg", "r") as f: - output = f.read() - - progress = "" - lang_list = "" - sorted_languages = sorted( - (await s.languages).items(), reverse=True, key=lambda t: t[1].get("size") - ) - delay_between = 150 - for i, (lang, data) in enumerate(sorted_languages): - color = data.get("color") - color = color if color is not None else "#000000" - progress += ( - f'' - ) - lang_list += f""" + try: + print("Starting generation of languages.svg...") + with open("templates/languages.svg", "r") as f: + output = f.read() + + print("Fetching languages data...") + languages = await s.languages + print(f"Found {len(languages)} languages") + + if not languages: + print("WARNING: No languages found! Generating empty languages.svg") + progress = "" + lang_list = "" + else: + progress = "" + lang_list = "" + sorted_languages = sorted( + languages.items(), reverse=True, key=lambda t: t[1].get("size") + ) + delay_between = 150 + for i, (lang, data) in enumerate(sorted_languages): + color = data.get("color") + color = color if color is not None else "#000000" + progress += ( + f'' + ) + lang_list += f"""
  • None: """ - output = re.sub(r"{{ progress }}", progress, output) - output = re.sub(r"{{ lang_list }}", lang_list, output) - - generate_output_folder() - with open("generated/languages.svg", "w") as f: - f.write(output) + output = re.sub(r"{{ progress }}", progress, output) + output = re.sub(r"{{ lang_list }}", lang_list, output) + + generate_output_folder() + output_path = "generated/languages.svg" + with open(output_path, "w", encoding="utf-8") as f: + f.write(output) + + # Verify file was created and has content + if not os.path.exists(output_path): + raise FileNotFoundError(f"Failed to create {output_path}") + file_size = os.path.getsize(output_path) + if file_size == 0: + raise ValueError(f"Generated {output_path} is empty!") + print(f"Successfully generated languages.svg ({file_size} bytes)") + except Exception as e: + print(f"ERROR generating languages.svg: {e}") + import traceback + traceback.print_exc() + raise ################################################################################ @@ -137,7 +179,27 @@ async def main() -> None: ignore_forked_repos=ignore_forked_repos, emails=email_list, ) - await asyncio.gather(generate_languages(s), generate_overview(s)) + try: + # Generate both images in parallel, but catch errors individually + results = await asyncio.gather( + generate_languages(s), + generate_overview(s), + return_exceptions=True + ) + + # Check for exceptions + for i, result in enumerate(results): + if isinstance(result, Exception): + image_name = "languages.svg" if i == 0 else "overview.svg" + print(f"ERROR: Failed to generate {image_name}: {result}") + raise result + + print("All images generated successfully!") + except Exception as e: + print(f"FATAL ERROR during image generation: {e}") + import traceback + traceback.print_exc() + raise if __name__ == "__main__": diff --git a/github_stats.py b/github_stats.py index d41f9cf..e29fd92 100644 --- a/github_stats.py +++ b/github_stats.py @@ -144,33 +144,33 @@ def summary_query() -> str: """ :return: GraphQL query with summary of user stats """ - return f"""query {{ - viewer {{ + return """query { + viewer { login name - repositories(first: 100, ownerAffiliations: OWNER, isFork: false) {{ + repositories(first: 100, ownerAffiliations: OWNER, isFork: false) { totalCount - edges {{ - node {{ - stargazers {{ + edges { + node { + stargazers { totalCount - }} + } forkCount - }} - }} - }} - pullRequests(first: 1) {{ + } + } + } + pullRequests(first: 1) { totalCount - }} - issues(first: 1) {{ + } + issues(first: 1) { totalCount - }} - contributionsCollection {{ + } + contributionsCollection { totalCommitContributions restrictedContributionsCount - }} - }} -}} + } + } +} """ @staticmethod @@ -383,6 +383,10 @@ async def get_summary_stats(self) -> None: self._prs = viewer.get("pullRequests", {}).get("totalCount", 0) self._issues = viewer.get("issues", {}).get("totalCount", 0) contributions = viewer.get("contributionsCollection", {}) + self._total_contributions = ( + contributions.get("totalCommitContributions", 0) + + contributions.get("restrictedContributionsCount", 0) + ) async def get_stats(self) -> None: """ @@ -848,18 +852,6 @@ async def get_all_time_commits(self) -> None: self._total_commits = total_commits print(f"Total commits from all years: {total_commits}") - @property - async def total_forks(self) -> int: - """ - :return: total number of forks on user's repos + forks made by user - """ - forks_received = await self.forks - forks_made = await self.forks_made - - total = forks_received + forks_made - print(f"Total forks: {forks_received} received + {forks_made} made = {total}") - - return total ############################################################################### # Main Function diff --git a/main.py b/main.py deleted file mode 100644 index 2154291..0000000 --- a/main.py +++ /dev/null @@ -1,6 +0,0 @@ -def main(): - print("Hello from stats!") - - -if __name__ == "__main__": - main() From 678d884d2e84d28513894c0d6a9e65dc3450b4bd Mon Sep 17 00:00:00 2001 From: AxelPCG/github-stats-modified Date: Wed, 14 Jan 2026 14:06:50 +0000 Subject: [PATCH 2/5] Update generated files [skip ci] --- generated/overview.svg | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/generated/overview.svg b/generated/overview.svg index 177148e..8452285 100644 --- a/generated/overview.svg +++ b/generated/overview.svg @@ -95,15 +95,15 @@
    Forks12
    -
    All-time contributions797
    +
    All-time contributions542
    -
    Commits1,062
    +
    Commits1,064
    -
    Repository views (past two weeks)47
    +
    Repository views (past two weeks)0
    -
    Repositories with contributions25
    +
    Repositories with contributions0
    -
    PRs87
    +
    PRs88
    Issues4
    From f1a90ef03fddc81db33bb94f9da99eced1b1cd8b Mon Sep 17 00:00:00 2001 From: Axel Patrick Chepanski Gonzaga Date: Wed, 14 Jan 2026 11:14:54 -0300 Subject: [PATCH 3/5] Refactor image generation to pre-fetch repository statistics and improve concurrency handling; implement locking mechanism in stats retrieval to prevent race conditions. --- generate_images.py | 19 ++-- github_stats.py | 230 ++++++++++++++++++++++++--------------------- 2 files changed, 129 insertions(+), 120 deletions(-) diff --git a/generate_images.py b/generate_images.py index 3776533..b3207ef 100644 --- a/generate_images.py +++ b/generate_images.py @@ -180,19 +180,14 @@ async def main() -> None: emails=email_list, ) try: - # Generate both images in parallel, but catch errors individually - results = await asyncio.gather( - generate_languages(s), - generate_overview(s), - return_exceptions=True - ) + # Pre-fetch stats to ensure data is loaded before generating images + print("Pre-fetching repository statistics...") + await s.get_stats() + print(f"Stats loaded: {len(await s.repos)} repos, {len(await s.languages)} languages") - # Check for exceptions - for i, result in enumerate(results): - if isinstance(result, Exception): - image_name = "languages.svg" if i == 0 else "overview.svg" - print(f"ERROR: Failed to generate {image_name}: {result}") - raise result + # Generate both images (stats already loaded, so parallel is safe now) + await generate_overview(s) + await generate_languages(s) print("All images generated successfully!") except Exception as e: diff --git a/github_stats.py b/github_stats.py index e29fd92..d5978ed 100644 --- a/github_stats.py +++ b/github_stats.py @@ -336,6 +336,10 @@ def __init__( self._repos: Optional[Set[str]] = None self._lines_changed: Optional[Tuple[int, int]] = None self._views: Optional[int] = None + + # Lock to prevent concurrent get_stats calls + self._stats_lock: Optional[asyncio.Lock] = None + self._stats_fetched: bool = False async def to_str(self) -> str: """ @@ -360,7 +364,9 @@ async def to_str(self) -> str: async def get_summary_stats(self) -> None: """ - Get lots of summary statistics using one big query. Sets many attributes + Get lots of summary statistics using one big query. Sets many attributes. + NOTE: This only sets _prs and _issues. Other stats come from get_stats() + or dedicated methods to avoid conflicts. """ raw_results = await self.queries.query(self.queries.summary_query()) if raw_results is None: @@ -369,122 +375,122 @@ async def get_summary_stats(self) -> None: if not viewer: return - self._name = viewer.get("name") or viewer.get("login", "No Name") - self._stargazers = sum( - [ - repo["node"]["stargazers"]["totalCount"] - for repo in viewer["repositories"]["edges"] - ] - ) - self._forks = sum( - [repo["node"]["forkCount"] for repo in viewer["repositories"]["edges"]] - ) - + if self._name is None: + self._name = viewer.get("name") or viewer.get("login", "No Name") + + # Only set PRs and Issues here - stars/forks come from get_stats() self._prs = viewer.get("pullRequests", {}).get("totalCount", 0) self._issues = viewer.get("issues", {}).get("totalCount", 0) - contributions = viewer.get("contributionsCollection", {}) - self._total_contributions = ( - contributions.get("totalCommitContributions", 0) + - contributions.get("restrictedContributionsCount", 0) - ) async def get_stats(self) -> None: """ - Get lots of summary statistics using one big query. Sets many attributes + Get lots of summary statistics using one big query. Sets many attributes. + Thread-safe: uses lock to prevent concurrent calls. """ - self._stargazers = 0 - self._forks = 0 - self._languages = dict() - self._repos = set() - - exclude_langs_lower = {x.lower() for x in self._exclude_langs} - print(f"Fetching stats for user: {self.username}") - print(f"Excluding repositories: {self._exclude_repos}") - print(f"Excluding languages: {self._exclude_langs}") - print(f"Ignore forked repos: {self._ignore_forked_repos}") - - next_owned = None - next_contrib = None - page_count = 0 - while True: - page_count += 1 - print(f"Fetching page {page_count}...") + # Initialize lock if needed + if self._stats_lock is None: + self._stats_lock = asyncio.Lock() - raw_results = await self.queries.query( - Queries.repos_overview( - owned_cursor=next_owned, contrib_cursor=next_contrib + async with self._stats_lock: + # Check if already fetched (another coroutine may have done it while we waited) + if self._stats_fetched: + return + + self._stargazers = 0 + self._forks = 0 + self._languages = dict() + self._repos = set() + + exclude_langs_lower = {x.lower() for x in self._exclude_langs} + print(f"Fetching stats for user: {self.username}") + print(f"Excluding repositories: {self._exclude_repos}") + print(f"Excluding languages: {self._exclude_langs}") + print(f"Ignore forked repos: {self._ignore_forked_repos}") + + next_owned = None + next_contrib = None + page_count = 0 + while True: + page_count += 1 + print(f"Fetching page {page_count}...") + + raw_results = await self.queries.query( + Queries.repos_overview( + owned_cursor=next_owned, contrib_cursor=next_contrib + ) ) - ) - raw_results = raw_results if raw_results is not None else {} + raw_results = raw_results if raw_results is not None else {} + + self._name = raw_results.get("data", {}).get("viewer", {}).get("name", None) + if self._name is None: + self._name = ( + raw_results.get("data", {}) + .get("viewer", {}) + .get("login", "No Name") + ) - self._name = raw_results.get("data", {}).get("viewer", {}).get("name", None) - if self._name is None: - self._name = ( + contrib_repos = ( raw_results.get("data", {}) .get("viewer", {}) - .get("login", "No Name") + .get("repositoriesContributedTo", {}) + ) + owned_repos = ( + raw_results.get("data", {}).get("viewer", {}).get("repositories", {}) ) - contrib_repos = ( - raw_results.get("data", {}) - .get("viewer", {}) - .get("repositoriesContributedTo", {}) - ) - owned_repos = ( - raw_results.get("data", {}).get("viewer", {}).get("repositories", {}) - ) - - repos = owned_repos.get("nodes", []) - if not self._ignore_forked_repos: - repos += contrib_repos.get("nodes", []) - - processed_repos = 0 - for repo in repos: - if repo is None: - continue - name = repo.get("nameWithOwner") - if name in self._repos or name in self._exclude_repos: - continue - self._repos.add(name) - processed_repos += 1 - - self._stargazers += repo.get("stargazers", {}).get("totalCount", 0) - self._forks += repo.get("forkCount", 0) + repos = owned_repos.get("nodes", []) + if not self._ignore_forked_repos: + repos += contrib_repos.get("nodes", []) - for lang in repo.get("languages", {}).get("edges", []): - lang_name = lang.get("node", {}).get("name", "Other") - if lang_name.lower() in exclude_langs_lower: + processed_repos = 0 + for repo in repos: + if repo is None: continue - if lang_name in self._languages: - self._languages[lang_name]["size"] += lang.get("size", 0) - self._languages[lang_name]["occurrences"] += 1 - else: - self._languages[lang_name] = { - "size": lang.get("size", 0), - "occurrences": 1, - "color": lang.get("node", {}).get("color"), - } - - print(f"Processed {processed_repos} repositories on page {page_count}") - - if owned_repos.get("pageInfo", {}).get( - "hasNextPage", False - ) or contrib_repos.get("pageInfo", {}).get("hasNextPage", False): - next_owned = owned_repos.get("pageInfo", {}).get( - "endCursor", next_owned - ) - next_contrib = contrib_repos.get("pageInfo", {}).get( - "endCursor", next_contrib - ) - else: - break + name = repo.get("nameWithOwner") + if name in self._repos or name in self._exclude_repos: + continue + self._repos.add(name) + processed_repos += 1 + + self._stargazers += repo.get("stargazers", {}).get("totalCount", 0) + self._forks += repo.get("forkCount", 0) + + for lang in repo.get("languages", {}).get("edges", []): + lang_name = lang.get("node", {}).get("name", "Other") + if lang_name.lower() in exclude_langs_lower: + continue + if lang_name in self._languages: + self._languages[lang_name]["size"] += lang.get("size", 0) + self._languages[lang_name]["occurrences"] += 1 + else: + self._languages[lang_name] = { + "size": lang.get("size", 0), + "occurrences": 1, + "color": lang.get("node", {}).get("color"), + } + + print(f"Processed {processed_repos} repositories on page {page_count}") + + if owned_repos.get("pageInfo", {}).get( + "hasNextPage", False + ) or contrib_repos.get("pageInfo", {}).get("hasNextPage", False): + next_owned = owned_repos.get("pageInfo", {}).get( + "endCursor", next_owned + ) + next_contrib = contrib_repos.get("pageInfo", {}).get( + "endCursor", next_contrib + ) + else: + break - print(f"Total repositories found: {len(self._repos)}") - print(f"Languages found: {len(self._languages)}") + print(f"Total repositories found: {len(self._repos)}") + print(f"Languages found: {len(self._languages)}") - langs_total = sum([v.get("size", 0) for v in self._languages.values()]) - for k, v in self._languages.items(): - v["prop"] = 100 * (v.get("size", 0) / langs_total) if langs_total > 0 else 0 + langs_total = sum([v.get("size", 0) for v in self._languages.values()]) + for k, v in self._languages.items(): + v["prop"] = 100 * (v.get("size", 0) / langs_total) if langs_total > 0 else 0 + + self._stats_fetched = True @property async def name(self) -> str: @@ -493,7 +499,7 @@ async def name(self) -> str: """ if self._name is not None: return self._name - await self.get_summary_stats() + await self.get_stats() assert self._name is not None return self._name @@ -504,7 +510,7 @@ async def stargazers(self) -> int: """ if self._stargazers is not None: return self._stargazers - await self.get_summary_stats() + await self.get_stats() assert self._stargazers is not None return self._stargazers @@ -513,9 +519,9 @@ async def forks(self) -> int: """ :return: total number of forks on user's repos + forks made by user """ - # Primeiro, obter forks recebidos + # Primeiro, obter forks recebidos via get_stats if self._forks is None: - await self.get_summary_stats() + await self.get_stats() assert self._forks is not None forks_received = self._forks @@ -564,11 +570,12 @@ async def repos(self) -> Set[str]: @property async def total_contributions(self) -> int: """ - :return: count of user's total contributions as defined by GitHub + :return: count of user's total contributions as defined by GitHub (all years) """ if self._total_contributions is not None: return self._total_contributions + print("Fetching total contributions from all years...") self._total_contributions = 0 years = ( (await self.queries.query(Queries.contrib_years())) @@ -577,6 +584,12 @@ async def total_contributions(self) -> int: .get("contributionsCollection", {}) .get("contributionYears", []) ) + print(f"Found contribution years: {years}") + + if not years: + print("WARNING: No contribution years found!") + return 0 + by_year = ( (await self.queries.query(Queries.all_contribs(years))) .get("data", {}) @@ -584,9 +597,10 @@ async def total_contributions(self) -> int: .values() ) for year in by_year: - self._total_contributions += year.get("contributionCalendar", {}).get( - "totalContributions", 0 - ) + contrib = year.get("contributionCalendar", {}).get("totalContributions", 0) + self._total_contributions += contrib + + print(f"Total contributions (all years): {self._total_contributions}") return cast(int, self._total_contributions) @property From 12d87d6b36d96598a601f710707b129e0a0d13d6 Mon Sep 17 00:00:00 2001 From: AxelPCG/github-stats-modified Date: Wed, 14 Jan 2026 16:46:37 +0000 Subject: [PATCH 4/5] Update generated files [skip ci] --- generated/overview.svg | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/generated/overview.svg b/generated/overview.svg index 8452285..dccfd0c 100644 --- a/generated/overview.svg +++ b/generated/overview.svg @@ -91,17 +91,17 @@ -
    Stars13
    +
    Stars17
    -
    Forks12
    +
    Forks16
    -
    All-time contributions542
    +
    All-time contributions804
    Commits1,064
    -
    Repository views (past two weeks)0
    +
    Repository views (past two weeks)48
    -
    Repositories with contributions0
    +
    Repositories with contributions26
    PRs88
    From f8ecefbafdcf5a2577fb548b77c09cd061e5882d Mon Sep 17 00:00:00 2001 From: Axel Patrick Chepanski Gonzaga Date: Wed, 14 Jan 2026 13:55:06 -0300 Subject: [PATCH 5/5] Refactor print statements for improved readability and consistency; remove unnecessary whitespace in github_stats.py. --- github_stats.py | 171 ++++++++++++++++++++++++++++++------------------ 1 file changed, 107 insertions(+), 64 deletions(-) diff --git a/github_stats.py b/github_stats.py index d5978ed..b5f1ccd 100644 --- a/github_stats.py +++ b/github_stats.py @@ -77,11 +77,11 @@ async def query_rest(self, path: str, params: Optional[Dict] = None) -> Dict: headers = { "Authorization": f"token {self.access_token}", } - + # API de busca de commits requer header especial if "/search/commits" in path: headers["Accept"] = "application/vnd.github.cloak-preview+json" - + if params is None: params = dict() if path.startswith("/"): @@ -94,11 +94,15 @@ async def query_rest(self, path: str, params: Optional[Dict] = None) -> Dict: params=tuple(params.items()), ) if r_async.status == 202: - print(f"Request to {path} returned 202 (processing). Retrying in 2s... (attempt {attempt + 1}/60)") + print( + f"Request to {path} returned 202 (processing). Retrying in 2s... (attempt {attempt + 1}/60)" + ) await asyncio.sleep(2) continue elif r_async.status == 403: - print(f"Request to {path} returned 403 (rate limit). Retrying in 5s... (attempt {attempt + 1}/60)") + print( + f"Request to {path} returned 403 (rate limit). Retrying in 5s... (attempt {attempt + 1}/60)" + ) await asyncio.sleep(5) continue elif r_async.status == 404: @@ -119,15 +123,21 @@ async def query_rest(self, path: str, params: Optional[Dict] = None) -> Dict: params=tuple(params.items()), ) if r_requests.status_code == 202: - print(f"Fallback request to {path} returned 202. Retrying in 2s... (attempt {attempt + 1}/60)") + print( + f"Fallback request to {path} returned 202. Retrying in 2s... (attempt {attempt + 1}/60)" + ) await asyncio.sleep(2) continue elif r_requests.status_code == 403: - print(f"Fallback request to {path} returned 403. Retrying in 5s... (attempt {attempt + 1}/60)") + print( + f"Fallback request to {path} returned 403. Retrying in 5s... (attempt {attempt + 1}/60)" + ) await asyncio.sleep(5) continue elif r_requests.status_code == 404: - print(f"Fallback request to {path} returned 404. Skipping...") + print( + f"Fallback request to {path} returned 404. Skipping..." + ) return dict() elif r_requests.status_code == 200: result_json = r_requests.json() @@ -135,7 +145,7 @@ async def query_rest(self, path: str, params: Optional[Dict] = None) -> Dict: return result_json except Exception as e2: print(f"Both aiohttp and requests failed for {path}: {e2}") - + print(f"Too many retries for {path}. Data will be incomplete.") return dict() @@ -191,7 +201,7 @@ def repos_overview( direction: DESC }}, isFork: false, - after: {"null" if owned_cursor is None else '"'+ owned_cursor +'"'} + after: {"null" if owned_cursor is None else '"' + owned_cursor + '"'} ) {{ pageInfo {{ hasNextPage @@ -227,7 +237,7 @@ def repos_overview( REPOSITORY, PULL_REQUEST_REVIEW ] - after: {"null" if contrib_cursor is None else '"'+ contrib_cursor +'"'} + after: {"null" if contrib_cursor is None else '"' + contrib_cursor + '"'} ) {{ pageInfo {{ hasNextPage @@ -336,7 +346,7 @@ def __init__( self._repos: Optional[Set[str]] = None self._lines_changed: Optional[Tuple[int, int]] = None self._views: Optional[int] = None - + # Lock to prevent concurrent get_stats calls self._stats_lock: Optional[asyncio.Lock] = None self._stats_fetched: bool = False @@ -377,7 +387,7 @@ async def get_summary_stats(self) -> None: if self._name is None: self._name = viewer.get("name") or viewer.get("login", "No Name") - + # Only set PRs and Issues here - stars/forks come from get_stats() self._prs = viewer.get("pullRequests", {}).get("totalCount", 0) self._issues = viewer.get("issues", {}).get("totalCount", 0) @@ -390,12 +400,12 @@ async def get_stats(self) -> None: # Initialize lock if needed if self._stats_lock is None: self._stats_lock = asyncio.Lock() - + async with self._stats_lock: # Check if already fetched (another coroutine may have done it while we waited) if self._stats_fetched: return - + self._stargazers = 0 self._forks = 0 self._languages = dict() @@ -413,7 +423,7 @@ async def get_stats(self) -> None: while True: page_count += 1 print(f"Fetching page {page_count}...") - + raw_results = await self.queries.query( Queries.repos_overview( owned_cursor=next_owned, contrib_cursor=next_contrib @@ -421,7 +431,9 @@ async def get_stats(self) -> None: ) raw_results = raw_results if raw_results is not None else {} - self._name = raw_results.get("data", {}).get("viewer", {}).get("name", None) + self._name = ( + raw_results.get("data", {}).get("viewer", {}).get("name", None) + ) if self._name is None: self._name = ( raw_results.get("data", {}) @@ -435,7 +447,9 @@ async def get_stats(self) -> None: .get("repositoriesContributedTo", {}) ) owned_repos = ( - raw_results.get("data", {}).get("viewer", {}).get("repositories", {}) + raw_results.get("data", {}) + .get("viewer", {}) + .get("repositories", {}) ) repos = owned_repos.get("nodes", []) @@ -451,20 +465,28 @@ async def get_stats(self) -> None: continue self._repos.add(name) processed_repos += 1 - + self._stargazers += repo.get("stargazers", {}).get("totalCount", 0) self._forks += repo.get("forkCount", 0) - for lang in repo.get("languages", {}).get("edges", []): + repo_langs = repo.get("languages", {}).get("edges", []) + if repo_langs: + lang_names = [ + entry.get("node", {}).get("name", "?") for entry in repo_langs + ] + print(f" Repo {name}: {lang_names}") + + for lang in repo_langs: lang_name = lang.get("node", {}).get("name", "Other") + lang_size = lang.get("size", 0) if lang_name.lower() in exclude_langs_lower: continue if lang_name in self._languages: - self._languages[lang_name]["size"] += lang.get("size", 0) + self._languages[lang_name]["size"] += lang_size self._languages[lang_name]["occurrences"] += 1 else: self._languages[lang_name] = { - "size": lang.get("size", 0), + "size": lang_size, "occurrences": 1, "color": lang.get("node", {}).get("color"), } @@ -488,8 +510,20 @@ async def get_stats(self) -> None: langs_total = sum([v.get("size", 0) for v in self._languages.values()]) for k, v in self._languages.items(): - v["prop"] = 100 * (v.get("size", 0) / langs_total) if langs_total > 0 else 0 - + v["prop"] = ( + 100 * (v.get("size", 0) / langs_total) if langs_total > 0 else 0 + ) + + # Debug: show language breakdown + print("Language breakdown (by size):") + sorted_langs = sorted( + self._languages.items(), key=lambda x: x[1].get("size", 0), reverse=True + ) + for lang_name, lang_data in sorted_langs[:15]: # Top 15 + print( + f" {lang_name}: {lang_data.get('size', 0):,} bytes ({lang_data.get('prop', 0):.2f}%)" + ) + self._stats_fetched = True @property @@ -524,14 +558,14 @@ async def forks(self) -> int: await self.get_stats() assert self._forks is not None forks_received = self._forks - + # Depois, obter forks feitos forks_made = await self.forks_made - + # Retornar a soma total total = forks_received + forks_made print(f"Total forks: {forks_received} received + {forks_made} made = {total}") - + return total @property @@ -585,11 +619,11 @@ async def total_contributions(self) -> int: .get("contributionYears", []) ) print(f"Found contribution years: {years}") - + if not years: print("WARNING: No contribution years found!") return 0 - + by_year = ( (await self.queries.query(Queries.all_contribs(years))) .get("data", {}) @@ -599,7 +633,7 @@ async def total_contributions(self) -> int: for year in by_year: contrib = year.get("contributionCalendar", {}).get("totalContributions", 0) self._total_contributions += contrib - + print(f"Total contributions (all years): {self._total_contributions}") return cast(int, self._total_contributions) @@ -614,16 +648,16 @@ async def lines_changed(self) -> Tuple[int, int]: deletions = 0 repos = await self.repos print(f"Calculating lines changed for {len(repos)} repositories...") - + for i, repo in enumerate(repos): try: - print(f"Processing repository {i+1}/{len(repos)}: {repo}") + print(f"Processing repository {i + 1}/{len(repos)}: {repo}") r = await self.queries.query_rest(f"/repos/{repo}/stats/contributors") - + if not r or not isinstance(r, list): print(f"Invalid response for {repo}: {type(r)}") continue - + for author_obj in r: # Handle malformed response from the API by skipping this repo if not isinstance(author_obj, dict) or not isinstance( @@ -637,12 +671,12 @@ async def lines_changed(self) -> Tuple[int, int]: weeks = author_obj.get("weeks", []) if not isinstance(weeks, list): continue - + for week in weeks: if isinstance(week, dict): additions += week.get("a", 0) deletions += week.get("d", 0) - + except Exception as e: print(f"Error processing {repo}: {e}") continue @@ -663,24 +697,24 @@ async def views(self) -> int: total = 0 repos = await self.repos print(f"Calculating views for {len(repos)} repositories...") - + for i, repo in enumerate(repos): try: - print(f"Processing views for repository {i+1}/{len(repos)}: {repo}") + print(f"Processing views for repository {i + 1}/{len(repos)}: {repo}") r = await self.queries.query_rest(f"/repos/{repo}/traffic/views") - + if not r or not isinstance(r, dict): print(f"Invalid response for {repo}: {type(r)}") continue - + views_data = r.get("views", []) if not isinstance(views_data, list): continue - + for view in views_data: if isinstance(view, dict): total += view.get("count", 0) - + except Exception as e: print(f"Error processing views for {repo}: {e}") continue @@ -707,8 +741,10 @@ async def total_commits(self) -> int: }} ''' response = await self.queries.query(query) - if 'data' in response and 'user' in response['data']: - total_commit = response['data']['user']['contributionsCollection']['totalCommitContributions'] + if "data" in response and "user" in response["data"]: + total_commit = response["data"]["user"]["contributionsCollection"][ + "totalCommitContributions" + ] total_commits += total_commit else: print(f"Erro ao buscar commits para email {email}: {response}") @@ -723,10 +759,14 @@ async def total_commits(self) -> int: }} ''' response = await self.queries.query(query) - if 'data' in response and 'user' in response['data']: - total_commits = response['data']['user']['contributionsCollection']['totalCommitContributions'] + if "data" in response and "user" in response["data"]: + total_commits = response["data"]["user"]["contributionsCollection"][ + "totalCommitContributions" + ] else: - print(f"Erro ao buscar commits para username {self.username}: {response}") + print( + f"Erro ao buscar commits para username {self.username}: {response}" + ) return total_commits @property @@ -757,12 +797,12 @@ async def get_user_forks(self) -> None: """ if self._forks_made is not None: return - + print("Fetching forks made by user...") - + total_forks = 0 cursor = None - + while True: query = f""" query {{ @@ -784,29 +824,29 @@ async def get_user_forks(self) -> None: }} """ raw_results = await self.queries.query(query) - + if raw_results is None: self._forks_made = 0 return - + viewer = raw_results.get("data", {}).get("viewer", {}) if not viewer: self._forks_made = 0 return - + repositories = viewer.get("repositories", {}) - + # Na primeira página, pegamos o totalCount if cursor is None: total_forks = repositories.get("totalCount", 0) - + # Verificar se há mais páginas page_info = repositories.get("pageInfo", {}) if page_info.get("hasNextPage"): cursor = page_info.get("endCursor") else: break - + self._forks_made = total_forks print(f"Found {self._forks_made} forks made by user") @@ -826,7 +866,7 @@ async def get_all_time_commits(self) -> None: Get total commits from all years via GraphQL """ print("Fetching total commits from all years...") - + # Buscar todos os anos de contribuição years = ( (await self.queries.query(Queries.contrib_years())) @@ -835,10 +875,10 @@ async def get_all_time_commits(self) -> None: .get("contributionsCollection", {}) .get("contributionYears", []) ) - + print(f"Found contribution years: {years}") total_commits = 0 - + # Para cada ano, buscar o total de commits for year in years: query = f""" @@ -853,16 +893,19 @@ async def get_all_time_commits(self) -> None: """ result = await self.queries.query(query) if result and "data" in result: - contrib = result.get("data", {}).get("viewer", {}).get("contributionsCollection", {}) - year_commits = ( - contrib.get("totalCommitContributions", 0) + - contrib.get("restrictedContributionsCount", 0) + contrib = ( + result.get("data", {}) + .get("viewer", {}) + .get("contributionsCollection", {}) + ) + year_commits = contrib.get("totalCommitContributions", 0) + contrib.get( + "restrictedContributionsCount", 0 ) total_commits += year_commits print(f" Year {year}: {year_commits} commits") else: print(f" Year {year}: Failed to fetch data") - + self._total_commits = total_commits print(f"Total commits from all years: {total_commits}") @@ -888,4 +931,4 @@ async def main() -> None: if __name__ == "__main__": - asyncio.run(main()) \ No newline at end of file + asyncio.run(main())