aboutsummaryrefslogtreecommitdiffstats
path: root/webstats/workforrobots.org
diff options
context:
space:
mode:
Diffstat (limited to 'webstats/workforrobots.org')
-rw-r--r--webstats/workforrobots.org526
1 files changed, 0 insertions, 526 deletions
diff --git a/webstats/workforrobots.org b/webstats/workforrobots.org
deleted file mode 100644
index 08f124b..0000000
--- a/webstats/workforrobots.org
+++ /dev/null
@@ -1,526 +0,0 @@
-#+title: Work for robots
-#+PROPERTY: header-args:sqlite :db /scratch/titan/apache2/cgit-logs.sqlite :colnames yes
-#+PROPERTY: header-args :exports both :cache yes :eval no-export
-#+HTML_HEAD: <script src="http://127.0.0.1:8095/skewer"></script>
-#+HTML_HEAD: <link rel="stylesheet" href="static/uPlot.min.css" />
-#+HTML_HEAD_EXTRA: <script src="static/uPlot.iife.min.js"></script>
-
-I self-host some of my git repositories to keep sovereignty and independence
-from large Internet corporations. Public facing repositories are for everybody,
-and today that means for robots. Robots are the main consumers of my work. With
-the =AI-hype=, I wanted to have a look at what are those AI companies collecting
-from my work. It is worse than everything, it is idiotically everything. They
-can't recognize, that they are parsing git repositories and use the appropriate
-way of downloading them.
-
-#+begin_src sqlite :exports none
-SELECT
- min(date),
- min(datetime (date, 'unixepoch')),
- min(datetime (date, 'unixepoch', 'localtime')),
- max(date),
- max(datetime (date, 'unixepoch')),
- max(datetime (date, 'unixepoch', 'localtime'))
-FROM
- logs
-#+end_src
-
-#+RESULTS[8f773b86167f2d36db335568f344063f13838a11]:
-| min(date) | min(datetime (date, 'unixepoch')) | min(datetime (date, 'unixepoch', 'localtime')) | max(date) | max(datetime (date, 'unixepoch')) | max(datetime (date, 'unixepoch', 'localtime')) |
-|------------+-----------------------------------+------------------------------------------------+------------+-----------------------------------+------------------------------------------------|
-| 1735686035 | 2024-12-31 23:00:35 | 2025-01-01 00:00:35 | 1745109504 | 2025-04-20 00:38:24 | 2025-04-20 02:38:24 |
-* Who is visiting
-I analyzed the =Apache= log files of my =cgit= service in the period from
-=2025-01-01= till =2025-04-20=. Table [[top-users]] shows the top /users/ of my
-public facing git repository. The leading AI companies =OpenAI= and =Anthropic=
-with their respective bots =GPTBot= and =ClaudeBot= simply dominate the load on
-the service. I found it unbelievable that they could extract about =≈7GiB= of
-data each. That is a lot of Bandwidth out of my server for a few git
-repositories and in a lightweight web interface.
-
-#+begin_src sqlite :exports results
---SELECT
- --count(*) AS Requests,
- --round(total (length) / 1024 / 1024, 1) "Tx MiB",
- ----0,
- --'Everybody else' AS "User Agent"
---FROM
- --logs
---WHERE
- --agentid NOT IN (143, 1, 19, 6, 4602, 3, 2, 4, 10306, 9)
- --AND path NOT LIKE '/ingrid/%'
---UNION
-SELECT
- count(*) AS Requests,
- round(total (length) / 1024 / 1024, 1) "Tx MiB",
- --agentid,
- user_agent AS "User Agent"
-FROM
- logs
- JOIN agent ON agent.id = logs.agentid
-WHERE
- path NOT LIKE '/ingrid/%'
-GROUP BY
- agentid
-ORDER BY
- 2 DESC
-LIMIT 10
-#+end_src
-
-#+name: top-users
-this is confusing how can I rewrite this caption
-#+caption: Top 10 /users/ ranked by bandwidth usage (/Tx/). /User Agent/ user agent is how they self-identify themselves.
-#+RESULTS[36d7b647efa39c3af86581279748a2bb53d034f3]:
-| Requests | Tx MiB | User Agent |
-|----------+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| 3572480 | 8819.6 | Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; *GPTBot* /1.2; +https://openai.com/gptbot) |
-| 1617262 | 6766.3 | Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; *ClaudeBot* /1.0; +claudebot@anthropic.com) |
-| 273968 | 721.4 | Mozilla/5.0 (compatible; *Barkrowler* /0.9; +https://babbar.tech/crawler) |
-| 80159 | 498.3 | Mozilla/5.0 (*Macintosh*; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36 |
-| 207771 | 475.8 | *Scrapy* /2.11.2 (+https://scrapy.org) |
-| 69697 | 466.1 | Mozilla/5.0 (Linux; Android 7.0;) AppleWebKit/537.36 (KHTML, like Gecko) Mobile Safari/537.36 (compatible; *PetalBot*;+https://webmaster.petalsearch.com/site/petalbot) |
-| 59832 | 416.4 | Mozilla/5.0 (compatible; *AhrefsBot* /7.0; +http://ahrefs.com/robot/) |
-| 14142 | 83.3 | Mozilla/5.0 (Linux; Android 5.0) AppleWebKit/537.36 (KHTML, like Gecko) Mobile Safari/537.36 (compatible; *Bytespider*; spider-feedback@bytedance.com) |
-| 2500 | 53.7 | Mozilla/5.0 (compatible; *SeekportBot*; +https://bot.seekport.com) |
-| 3578 | 30.9 | Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.7049.52 Mobile Safari/537.36 (compatible; *Google* Other) |
-
-What does it look like as a function of time? Figure [[fig:agent-traffic]] shows the
-load on CGit frontend service by each visiting agent over time. Hover over the
-plot to read the exact value for each agent at a given time on the legend. You
-can highlight a specific curve by hovering over it or its legend. You can toggle
-the display of a curve by clicking on its legend.
-
-#+begin_src sqlite :results value file :file top_agent_traffic.csv :exports none
-SELECT
- date / 14400 * 14400 AS time, -- 4h bin
- count(*) AS requests,
- total (length) FILTER (WHERE agentid = 143) AS "OpenAI-GPTBot",
- total (length) FILTER (WHERE agentid = 1) AS "ClaudeBot",
- total (length) FILTER (WHERE agentid = 19) AS "Barkrowler",
- total (length) FILTER (WHERE agentid = 6) AS "Macintosh",
- total (length) FILTER (WHERE agentid = 4602) AS "Scrapy",
- total (length) FILTER (WHERE agentid = 3) AS "PetalBot",
- total (length) FILTER (WHERE agentid = 2) AS "AhrefsBot",
- total (length) FILTER (WHERE agentid = 4) AS "Bytespider",
- total (length) FILTER (WHERE agentid = 10306) AS "SeekportBot",
- total (length) FILTER (WHERE agentid = 9) AS "Google",
- total (length) FILTER (WHERE agentid not in (143, 1, 19, 6, 4602, 3, 2, 4, 10306, 9)) AS "Rest"
-FROM
- logs
-WHERE
- path NOT LIKE '/ingrid/%'
- AND date NOT NULL
-GROUP BY
- time
-#+end_src
-
-#+RESULTS[2ec6d40ab4f5a844bdbd855884f8d0a6346fd780]:
-[[file:top_agent_traffic.csv]]
-
-#+attr_html: :id agent-traffic
-#+CAPTION: Load on CGit frontend service by each visiting agent. The black dashed line shows the total request at the server and uses the right axis scale. All other solid-filled lines, use the left axis and represent the bandwidth usage.
-#+NAME: fig:agent-traffic
-[[./jsfill.png]]
-
-This is confusing and hard to read. Rewrite it.
-
-You can see how aggressively the =ClaudeBot= scrapes pages, using a lot of
-bandwidth is a /short/ time. On the other hand =OpenAI-GPTBot= seems rate
-limited, because it scrapes over a /longer/ period of time. However, as seen in
-table [[top-users]], it performs more than twice the amount of request and consumes
-=30%= more bandwidth.
-
-The rest of the visitors are bots too. =Barkrowler= is a regular visitor
-gathering metrics for online marketing. =AhrefsBot= is of the same type, yet
-started crawling in March. =Macintosh= is certainly a bot hiding itself as a
-browser and constantly probing. =Scrappy= also unknown, came at the start of the
-year and never came back.
-
-=PetalBot= is for a search engine with AI recommendation by =Huawei=, it lingers
-and slowly scrapes everything. =Seekport= is a search engine, it came all of a
-sudden, took as much as it found useful, =<1%= of what the big AI bot take, and
-swiftly left again.
-
-=Bytespider= is almost background noise, but it is also to train an LLM, this
-time for =ByteDance=, the Chinese owner of =TikTok=.
-
-The last one =Google= doesn't even seem to be the bot for indexing its search
-engine, but rather one to test its =Chrome= browser and how it renders pages.
-
-=Rest= is all the remaining /robots/ or /users/. The have consumed around
-=≈400MiB=, placing them in aggregate in a behavior like =Macintosh, Scrapy,
-PetalBot & AhrefsBot=. Mostly often is hacker bots proving the site. Which also
-means that =~400MiB= is what you need to crawl the site. AI crawlers siphoning
-*10X* that amount is abusive.
-
-* How should they visit?
-=CGit= is a web interface for =git= repositories. You can browse some of my
-code, some files, that is it. If you want *everything*, the correct use of this
-service is through the =git= client, downloading my publicly available software.
-
-That makes the data a lot more useful, even for those AI companies. Because the
-data cleanup would be easier. They, themselves should use their AI to recognize
-what kind of page they are vising and act accordingly instead of stupidly
-scraping everything.
-
-How have the good citizens behaved? That is on table [[git-users]]. The =Software
-Heritage= keeps mirrors of git repositories. It thus watches for updates and the
-downloads. There are other people besides them that downloaded, but in total
-they only downloaded =≈21MiB=. That is =0.3%= compared to =ClaudeBot=.
-
-#+begin_src sqlite :exports results
-SELECT
- --agent.id,
- user_agent as "User Agent",
- count(*) AS hits,
- round(total (length) / 1024, 1) AS "tx KiB"
-FROM
- logs
- JOIN agent ON logs.agentid = agent.id
-WHERE
- user_agent LIKE '%git%'
- and id != 10897
-GROUP BY
- user_agent
-order by total(length) desc
-#+end_src
-
-#+name: git-users
-#+caption: Git users
-#+RESULTS[333fcbc738819c497f14b4445a3b45f391f0db7e]:
-| User Agent | hits | tx KiB |
-|----------------------------------------------------------------------------------+------+---------|
-| git/2.40.3 | 1075 | 12821.3 |
-| git/2.34.1 | 1149 | 3687.1 |
-| Software Heritage dumb Git loader | 337 | 2533.6 |
-| git/2.48.1 | 115 | 1908.6 |
-| Software Heritage cgit lister v6.9.3 (+https://www.softwareheritage.org/contact) | 8 | 21.3 |
-| Software Heritage cgit lister v6.9.2 (+https://www.softwareheritage.org/contact) | 8 | 21.0 |
-| git/dulwich/0.22.7 | 2 | 8.5 |
-| git/dulwich/0.22.6 | 1 | 4.2 |
-|----------------------------------------------------------------------------------+------+---------|
-| Total | 2695 | 21005.6 |
-
-* What are they looking at?
-The web front end of git repositories of course, but it there a pattern?
-
-Table [[status-codes]] show the status codes of all requests performed by the users.
-The failure rate of =OpenAI= is alarming. From its =3.5 million= requests =15%=
-are client errors: =404= not found page error, and it consumes about =≈2GiB= of
-bandwidth. What is their scraper doing so wrong? =ClaudeBot= as noted earlier,
-manages to scrape with half the requests and an error rate of =1.6%=.
-
-=Everybody else= are all the remaining users. They do have an error rate of
-=25%=, but that is normal as they are generally hacker robots scanning for
-vulnerabilities. You are always under attack on the internet.
-
-#+begin_src sqlite :exports results
-SELECT
- CASE WHEN agentid = 143 THEN
- 'OpenAI-GPTBot'
- WHEN agentid = 1 THEN
- 'ClaudeBot'
- WHEN agentid = 19 THEN
- 'Barkrowler'
- WHEN agentid = 6 THEN
- 'Macintosh'
- WHEN agentid = 4602 THEN
- 'Scrapy'
- WHEN agentid = 3 THEN
- 'PetalBot'
- WHEN agentid = 2 THEN
- 'AhrefsBot'
- WHEN agentid = 4 THEN
- 'Bytespider'
- WHEN agentid = 10306 THEN
- 'SeekportBot'
- WHEN agentid = 9 THEN
- 'Google'
- ELSE
- 'Everybody else'
- END AS Agent,
- --count(*) AS "Requests",
- count(*) FILTER (WHERE status BETWEEN 200 AND 299) AS "2XX",
- count(*) FILTER (WHERE status BETWEEN 300 AND 399) AS "3XX",
- count(*) FILTER (WHERE status BETWEEN 400 AND 499) AS "4XX",
- count(*) FILTER (WHERE status BETWEEN 500 AND 599) AS "5XX",
- round((total(length) FILTER (WHERE status BETWEEN 400 AND 499))/1024/1024, 2) AS "4XX MiB",
- round((100.0 * count(*) FILTER (WHERE status BETWEEN 400 AND 499)) / count(*), 2) AS "4XX %"
-FROM
- logs
-GROUP BY
- Agent
-ORDER BY
- "4XX" DESC
-#+end_src
-
-#+name: status-codes
-#+caption: HTTP status codes per user agent
-#+RESULTS[b4402559f97ad9a4f1ec20091284651af575ffeb]:
-| Agent | 2XX | 3XX | 4XX | 5XX | 4XX MiB | 4XX % |
-|----------------+---------+-----+--------+-----+---------+-------|
-| / | < | | | | < | |
-| OpenAI-GPTBot | 3017848 | 0 | 554511 | 121 | 2060.23 | 15.52 |
-| Everybody else | 99066 | 467 | 34630 | 14 | 101.96 | 25.81 |
-| ClaudeBot | 1591179 | 26 | 25611 | 446 | 162.67 | 1.58 |
-| Barkrowler | 272343 | 0 | 1618 | 7 | 5.35 | 0.59 |
-| Macintosh | 79071 | 2 | 1086 | 0 | 7.87 | 1.35 |
-| Bytespider | 13609 | 0 | 531 | 2 | 3.94 | 3.75 |
-| PetalBot | 69223 | 0 | 473 | 1 | 3.2 | 0.68 |
-| Scrapy | 207240 | 0 | 348 | 183 | 1.14 | 0.17 |
-| AhrefsBot | 59733 | 0 | 90 | 9 | 0.61 | 0.15 |
-| Google | 3576 | 0 | 2 | 0 | 0.02 | 0.06 |
-| SeekportBot | 2500 | 0 | 0 | 0 | 0.0 | 0.0 |
-
-Let's have a look at the most not found pages. Listed in table [[fail-pages]] are
-each of the page paths, how much bandwidth(=tx=) each consumed, and then the
-requests per bot. With one exception, all pages are placeholder links used in
-website theme templates. The repository =hugo-minimalist-theme= is a [[https://gohugo.io][Hugo]] theme.
-Within the curly braces ={{ }}= the rendering engine replaces values. Certainly
-the html parser reads them raw an from the link =a= tag and requests the page.
-=ClaudeBot= seems to track error pages and not query them again. =OpenAI= is
-incapable of doing that, and stubbornly tries over and over.
-
-If you grep for the string /href="{{ .RelPermalink }}"/ over the entire git
-history of that repository, you find it appears up to today =954= times. It is
-surprising and annoying how =OpenAI= manages to request it triple that amount.
-
-#+begin_src sqlite :exports results
-SELECT
- replace(replace(replace(replace(path, '%7B', '{'), '%7D', '}'), '|', '\vert'), '%20', ' ') AS Page,
- round(total (length) / 1024 / 1024, 2) AS "tx MiB",
- count(*) FILTER (WHERE agentid = 143) AS "OpenAI",
- count(*) FILTER (WHERE agentid = 1) AS "ClaudeBot",
- count(*) FILTER (WHERE agentid NOT IN (1, 143)) AS "Rest"
- --substr(path, 0, 50)
-FROM
- logs
-WHERE
- path NOT LIKE '/ingrid/%'
- AND status = 404
-GROUP BY
- path
-ORDER BY
- 2 DESC
-LIMIT 10
-#+end_src
-
-#+name: fail-pages
-#+caption: Top 10: =404= error not found pages.
-#+RESULTS[19605bfdef59599b47ed8f0e4b3bce71daaca7d3]:
-| Page | tx MiB | OpenAI | ClaudeBot | Rest |
-|---------------------------------------------------------------------------+--------+--------+-----------+------|
-| /hugo-minimalist-theme/plain/layouts/partials/{{ .RelPermalink }} | 8.36 | 2805 | 3 | 7 |
-| /hugo-minimalist-theme/plain/layouts/partials/{{ .URL }} | 5.39 | 1629 | 1 | 13 |
-| /hugo-minimalist-theme/plain/layouts/partials/{{ . }} | 4.82 | 1559 | 1 | 4 |
-| /hugo-minimalist-theme/plain/layouts/partials/{{ $href }} | 4.28 | 1209 | 4 | 5 |
-| /.env | 3.84 | 0 | 0 | 744 |
-| /hugo-minimalist-theme/plain/layouts/partials/{{ .Permalink }} | 3.75 | 1060 | 2 | 15 |
-| /hugo-minimalist-theme/plain/layouts/partials/{{ $pag.Next.URL }} | 3.36 | 916 | 1 | 7 |
-| /hugo-minimalist-theme/plain/layouts/partials/{{ $pag.Prev.URL }} | 3.34 | 912 | 0 | 7 |
-| /hugo-minimalist-theme/plain/layouts/partials/{{ if ne .MediaType.SubType | 2.95 | 817 | 1 | 0 |
-| /hugo-minimalist-theme/plain/layouts/taxonomy/{{ .Name \vert urlize }} | 2.86 | 745 | 5 | 0 |
-
-What about the hackers? Table [[hacker-attacks]] excludes the AI bots to look for
-the attack surface. First is producing a =400 Bad Request= to the main site.
-Trying to steal/find environment secrets under the =.env= file, or the git
-configuration. Then the most common type of attack is aims to exploit the remote
-code execution in =PHPUnit= by looking for the file =eval-stdin=.
-
-#+begin_src sqlite :exports results
-SELECT
- round(total (length) / 1024 / 1024, 2) AS "Tx MiB",
- count(*) Requests,
- count(DISTINCT agentid) Agents,
- count(DISTINCT ipid) IPs,
- group_concat (DISTINCT status) AS "Errors",
- group_concat (DISTINCT request_method) AS "Methods",
- replace(path, '_', '\under{}') AS path
-FROM
- logs
-WHERE
- path NOT LIKE '/ingrid/%'
- AND status >= 300
- AND agentid NOT IN (1, 143)
-GROUP BY
- path
- --, status
-ORDER BY
- --sum(count(*)) OVER (PARTITION BY path) DESC,
- Requests DESC
-LIMIT 10
-#+end_src
-
-#+name: hacker-attacks
-#+caption: Top 10 attacks leading to error pages ranked by number of requests. Agents and IPs columns count different agents and IPs doing the requests.
-#+RESULTS[5d9125d3f545af74d94789c0893d257619926437]:
-| Tx MiB | Requests | Agents | IPs | Errors | Methods | path |
-|--------+----------+--------+------+-------------+----------+----------------------------------------------------------------------------------------------------------|
-| 3.17 | 3482 | 6 | 1139 | 400,421,408 | GET,POST | / |
-| 3.84 | 744 | 368 | 256 | 404 | GET,POST | /.env |
-| 2.26 | 409 | 1 | 11 | 404 | GET | /cgi-bin/luci/;stok=/locale |
-| 2.02 | 381 | 182 | 121 | 404 | GET | /.git/config |
-| 0.57 | 222 | 12 | 167 | 404 | GET,POST | /vendor/phpunit/phpunit/src/Util/PHP/eval-stdin.php |
-| 1.08 | 195 | 1 | 1 | 404 | GET | /actuator/gateway/routes |
-| 0.88 | 173 | 2 | 137 | 404 | POST | /hello.world?%ADd+allow\under{}url\under{}include%3d1+%ADd+auto\under{}prepend\under{}file%3dphp://input |
-| 0.23 | 157 | 2 | 127 | 404 | GET | /vendor/phpunit/phpunit/Util/PHP/eval-stdin.php |
-| 0.22 | 152 | 2 | 123 | 404 | GET | /vendor/phpunit/src/Util/PHP/eval-stdin.php |
-| 0.22 | 148 | 2 | 119 | 404 | GET | /vendor/phpunit/phpunit/LICENSE/eval-stdin.php |
-
-* Future plans
-Quite many webmasters have been annoyed by this abusive scraping of AI bots. The
-project [[https://xeiaso.net/blog/2025/anubis/][Anubis]] implements a proof of work /tax/ to visitors of a webpage. This
-way abusive AI bots scraping reduced.
-
-I personally dislike that idea, it does create an extra expense that the AI
-companies, which indiscriminately crawl the internet. But no one really wins. It
-is a failure of our internet system, that micro payments aren't yet a reality.
-This means for myself, being part of the change and bring my bitcoin lightning
-tipping system back online, this time with real coins. We need to get people
-used to pay for resources on the internet. For that we need a working
-infrastructure, we can't wait for the banking system to do it. In my opinion,
-the main reason why our internet so aggressively invades our privacy is because
-the banking system never provided a way to move money across the internet. The
-only people that could pay were advertising companies.
-
-Knowing how stupid the AI crawlers are, I believe poisoning the training data is
-better than using a Proof of work tax to cure AI companies such aggressive and
-mindless crawling. Projects like [[https://iocaine.madhouse-project.org/][Iocane]] provide a way for it and is what I'll
-implement in the future.
-
-#+begin_export html
-<script type="text/javascript">
- addEventListener("load", () => {
- function csvFloat(data) {
- let headers = data[0];
- let series = headers.map((_, idx) =>
- data.slice(1).map((row) => parseFloat(row[idx])),
- );
- return [headers, series];
- }
- function responseParseCSV(response) {
- if (response.ok)
- return response.text().then((data) =>
- data
- .split(/\n/)
- .filter((x) => x)
- .map((row) => row.split(/,/)),
- );
- throw new Error("not 2XX resp");
- }
- function withSuffix(val, suffix) {
- return val.toFixed(1).replace(/.?0+$/, "").concat("", suffix);
- }
- function siScaling(value) {
- var v = Math.abs(value);
- return 0 === v
- ? [0, ""]
- : v >= 1000000000000000.0
- ? [value / 1000000000000000.0, "P"]
- : v >= 1000000000000.0
- ? [value / 1000000000000.0, "T"]
- : v >= 1000000000.0
- ? [value / 1000000000.0, "G"]
- : v >= 1000000.0
- ? [value / 1000000.0, "M"]
- : v >= 1000.0
- ? [value / 1000.0, "K"]
- : v >= 0.6
- ? [value, ""]
- : v >= 0.001
- ? [value / 0.001, "m"]
- : v >= 0.000001
- ? [value / 0.000001, "μ"]
- : v >= 0.000000001
- ? [value / 0.000000001, "n"]
- : v >= 0.000000000001
- ? [value / 0.000000000001, "p"]
- : null;
- }
- function scaling(val, suffix) {
- return withSuffix.apply(this, siScaling(val));
- }
-
- function spacedColor(idx, alpha) {
- if (alpha === undefined) {
- alpha = "/ 1";
- }
- return "hsl(" + 137.506 * idx + " 70% 55% " + alpha + ")";
- }
- function agentChart(header, series, container) {
- const opts = {
- width: 920,
- height: 600,
- hooks: {
- setSeries: [
- (u, seriesIdx, opts) => {
- if (opts.focus != null) {
- u.series.forEach((s, i) => {
- s.width = i == seriesIdx ? 3 : 1;
- });
- }
- },
- ],
- },
- focus: { alpha: 0.5 },
- cursor: {
- focus: {
- prox: 1e6,
- bias: 0,
- dist: (self, seriesIdx, dataIdx, valPos, curPos) => {
- return valPos - curPos;
- },
- },
- },
-
- series: [
- {},
- {
- label: header[1],
- stroke: "black",
- dash: [10, 5],
- value: (u, v) => (v ? scaling(v) : v),
- scale: "hits",
- },
- ].concat(
- header.slice(2).map((name, idx) => ({
- label: name,
- stroke: spacedColor(idx),
- fill: spacedColor(idx, "/ 0.1"),
- value: (u, v) => (v ? scaling(v) + "B" : v),
- })),
- ),
- axes: [
- {},
- {
- values: (u, vals, space) =>
- vals.map((v) => (v ? scaling(v) + "B" : v)),
- size: 60,
- label: "Bandwidth",
- labelSize: 50,
- },
- {
- side: 1,
- scale: "hits",
- label: "Requests",
- grid: { show: false },
- values: (u, vals, space) => vals.map((v) => (v ? scaling(v) : v)),
- labelSize: 50,
- },
- ],
- };
- let uplot = new uPlot(opts, series, container);
- container["uobj"] = uplot;
- }
-
- fetch("/top_agent_traffic.csv")
- .then(responseParseCSV)
- .then(csvFloat)
- .then(([headers, series]) => {
- let cont = document.querySelector("#agent-traffic").parentNode;
- cont.innerHTML = "";
- agentChart(headers, series, cont);
- });
- });
-</script>
-#+end_export