<?xml version="1.0"?>
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="it">
	<id>https://wiki.mindmaker.it/index.php?action=history&amp;feed=atom&amp;title=Deep_Reinforcement_Learning_from_Human_Preferences</id>
	<title>Deep Reinforcement Learning from Human Preferences - Cronologia</title>
	<link rel="self" type="application/atom+xml" href="https://wiki.mindmaker.it/index.php?action=history&amp;feed=atom&amp;title=Deep_Reinforcement_Learning_from_Human_Preferences"/>
	<link rel="alternate" type="text/html" href="https://wiki.mindmaker.it/index.php?title=Deep_Reinforcement_Learning_from_Human_Preferences&amp;action=history"/>
	<updated>2026-05-14T07:13:20Z</updated>
	<subtitle>Cronologia della pagina su questo sito</subtitle>
	<generator>MediaWiki 1.41.0</generator>
	<entry>
		<id>https://wiki.mindmaker.it/index.php?title=Deep_Reinforcement_Learning_from_Human_Preferences&amp;diff=2510&amp;oldid=prev</id>
		<title>Mindmakerbot il 10:41, 17 ago 2024</title>
		<link rel="alternate" type="text/html" href="https://wiki.mindmaker.it/index.php?title=Deep_Reinforcement_Learning_from_Human_Preferences&amp;diff=2510&amp;oldid=prev"/>
		<updated>2024-08-17T10:41:41Z</updated>

		<summary type="html">&lt;p&gt;&lt;/p&gt;
&lt;table style=&quot;background-color: #fff; color: #202122;&quot; data-mw=&quot;interface&quot;&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;tr class=&quot;diff-title&quot; lang=&quot;it&quot;&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #202122; text-align: center;&quot;&gt;← Versione meno recente&lt;/td&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #202122; text-align: center;&quot;&gt;Versione delle 10:41, 17 ago 2024&lt;/td&gt;
				&lt;/tr&gt;&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot; id=&quot;mw-diff-left-l1&quot;&gt;Riga 1:&lt;/td&gt;
&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Riga 1:&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;−&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;del style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;Titolo&lt;/del&gt;: &lt;del style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;[[Titolo::Deep Reinforcement Learning from Human Preferences]]&lt;/del&gt;&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;{{template pubblicazione&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;|data=2023&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;|autori=Paul F Christiano, Jan Leike, Tom B Brown, Miljan Martic, Shane Legg, Dario Amodei&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;|URL=https&lt;/ins&gt;:&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;//www.semanticscholar.org/paper/5bbb6f9a8204eb13070b6f033e61c84ef8ee68dd&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;|topic=Metodo alternativo che utilizza le preferenze umane come guida per l&#039;apprendimento&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;|citazioni=2215&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;}}&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;−&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;del style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;Anno &lt;/del&gt;di &lt;del style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;pubblicazione: [[AnnoDiPubblicazione::2023]]&lt;/del&gt;&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;Questo articolo esplora l&#039;utilizzo delle preferenze umane come metodo per addestrare sistemi &lt;/ins&gt;di &lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;apprendimento per rinforzo (RL) complessi&lt;/ins&gt;. &lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;Invece di affidarsi a una funzione di ricompensa esplicita, il sistema impara dalle preferenze umane espresse tra coppie di segmenti di traiettoria&lt;/ins&gt;. &lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;Questo approccio si è dimostrato efficace in attività complesse come i giochi Atari e la locomozione di robot simulati, richiedendo feedback umano solo su una piccola percentuale di interazioni dell&#039;agente con l&#039;ambiente&lt;/ins&gt;. &lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;La flessibilità di questo metodo è ulteriormente dimostrata dalla sua capacità di addestrare comportamenti nuovi e complessi con circa un&#039;ora di supervisione umana. I risultati suggeriscono &lt;/ins&gt;che &lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;l&#039;apprendimento dalle &lt;/ins&gt;preferenze umane &lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;può essere una valida alternativa &lt;/ins&gt;per l&#039;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;addestramento di sistemi di RL in scenari del mondo reale in cui la definizione di una funzione di ricompensa esplicita è difficile o costosa.&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;−&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt; &lt;/div&gt;&lt;/td&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-added&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;−&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;del style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;Autori: [[Autori::Paul F Christiano]]; [[Autori::Jan Leike]]; [[Autori::Tom B Brown]]; [[Autori::Miljan Martic]]; [[Autori::Shane Legg]]; [[Autori::Dario Amodei]]&lt;/del&gt;&lt;/div&gt;&lt;/td&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-added&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;−&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt; &lt;/div&gt;&lt;/td&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-added&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;−&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;del style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;URL: [[URL::https://arxiv&lt;/del&gt;.&lt;del style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;org/pdf/1706&lt;/del&gt;.&lt;del style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;03741&lt;/del&gt;.&lt;del style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;pdf]]&lt;/del&gt;&lt;/div&gt;&lt;/td&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-added&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;−&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt; &lt;/div&gt;&lt;/td&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-added&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;−&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;del style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;Topic: [[Topic::Metodo alternativo &lt;/del&gt;che &lt;del style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;utilizza le &lt;/del&gt;preferenze umane &lt;del style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;come guida &lt;/del&gt;per l&#039;&lt;del style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;apprendimento]]&lt;/del&gt;&lt;/div&gt;&lt;/td&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-added&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;−&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt; &lt;/div&gt;&lt;/td&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-added&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;−&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;del style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;[[Category:pubblicazione]]&lt;/del&gt;&lt;/div&gt;&lt;/td&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-added&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;−&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt; &lt;/div&gt;&lt;/td&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-added&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;−&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;del style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;__SHOWFACTBOX__&lt;/del&gt;&lt;/div&gt;&lt;/td&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-added&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;/table&gt;</summary>
		<author><name>Mindmakerbot</name></author>
	</entry>
	<entry>
		<id>https://wiki.mindmaker.it/index.php?title=Deep_Reinforcement_Learning_from_Human_Preferences&amp;diff=1879&amp;oldid=prev</id>
		<title>Sara Maserati il 10:49, 9 apr 2024</title>
		<link rel="alternate" type="text/html" href="https://wiki.mindmaker.it/index.php?title=Deep_Reinforcement_Learning_from_Human_Preferences&amp;diff=1879&amp;oldid=prev"/>
		<updated>2024-04-09T10:49:51Z</updated>

		<summary type="html">&lt;p&gt;&lt;/p&gt;
&lt;table style=&quot;background-color: #fff; color: #202122;&quot; data-mw=&quot;interface&quot;&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;tr class=&quot;diff-title&quot; lang=&quot;it&quot;&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #202122; text-align: center;&quot;&gt;← Versione meno recente&lt;/td&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #202122; text-align: center;&quot;&gt;Versione delle 10:49, 9 apr 2024&lt;/td&gt;
				&lt;/tr&gt;&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot; id=&quot;mw-diff-left-l8&quot;&gt;Riga 8:&lt;/td&gt;
&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Riga 8:&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Topic: [[Topic::Metodo alternativo che utilizza le preferenze umane come guida per l&amp;#039;apprendimento]]&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Topic: [[Topic::Metodo alternativo che utilizza le preferenze umane come guida per l&amp;#039;apprendimento]]&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;[[Category:pubblicazione]]&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;__SHOWFACTBOX__&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;__SHOWFACTBOX__&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;/table&gt;</summary>
		<author><name>Sara Maserati</name></author>
	</entry>
	<entry>
		<id>https://wiki.mindmaker.it/index.php?title=Deep_Reinforcement_Learning_from_Human_Preferences&amp;diff=1864&amp;oldid=prev</id>
		<title>Sara Maserati: Creata pagina con &quot;Titolo: Titolo::Deep Reinforcement Learning from Human Preferences  Anno di pubblicazione: AnnoDiPubblicazione::2023  Autori: Autori::Paul F Christiano; Autori::Jan Leike; Autori::Tom B Brown; Autori::Miljan Martic; Autori::Shane Legg; Autori::Dario Amodei  URL: URL::https://arxiv.org/pdf/1706.03741.pdf  Topic: Topic::Metodo alternativo che utilizza le preferenze umane come guida per l&#039;apprendimento  __SHOWFACTBOX__&quot;</title>
		<link rel="alternate" type="text/html" href="https://wiki.mindmaker.it/index.php?title=Deep_Reinforcement_Learning_from_Human_Preferences&amp;diff=1864&amp;oldid=prev"/>
		<updated>2024-04-09T09:53:44Z</updated>

		<summary type="html">&lt;p&gt;Creata pagina con &amp;quot;Titolo: &lt;a href=&quot;/index.php?title=Titolo::Deep_Reinforcement_Learning_from_Human_Preferences&amp;amp;action=edit&amp;amp;redlink=1&quot; class=&quot;new&quot; title=&quot;Titolo::Deep Reinforcement Learning from Human Preferences (la pagina non esiste)&quot;&gt;Titolo::Deep Reinforcement Learning from Human Preferences&lt;/a&gt;  Anno di pubblicazione: &lt;a href=&quot;/index.php?title=AnnoDiPubblicazione::2023&amp;amp;action=edit&amp;amp;redlink=1&quot; class=&quot;new&quot; title=&quot;AnnoDiPubblicazione::2023 (la pagina non esiste)&quot;&gt;AnnoDiPubblicazione::2023&lt;/a&gt;  Autori: &lt;a href=&quot;/index.php?title=Autori::Paul_F_Christiano&amp;amp;action=edit&amp;amp;redlink=1&quot; class=&quot;new&quot; title=&quot;Autori::Paul F Christiano (la pagina non esiste)&quot;&gt;Autori::Paul F Christiano&lt;/a&gt;; &lt;a href=&quot;/index.php?title=Autori::Jan_Leike&amp;amp;action=edit&amp;amp;redlink=1&quot; class=&quot;new&quot; title=&quot;Autori::Jan Leike (la pagina non esiste)&quot;&gt;Autori::Jan Leike&lt;/a&gt;; &lt;a href=&quot;/index.php?title=Autori::Tom_B_Brown&amp;amp;action=edit&amp;amp;redlink=1&quot; class=&quot;new&quot; title=&quot;Autori::Tom B Brown (la pagina non esiste)&quot;&gt;Autori::Tom B Brown&lt;/a&gt;; &lt;a href=&quot;/index.php?title=Autori::Miljan_Martic&amp;amp;action=edit&amp;amp;redlink=1&quot; class=&quot;new&quot; title=&quot;Autori::Miljan Martic (la pagina non esiste)&quot;&gt;Autori::Miljan Martic&lt;/a&gt;; &lt;a href=&quot;/index.php?title=Autori::Shane_Legg&amp;amp;action=edit&amp;amp;redlink=1&quot; class=&quot;new&quot; title=&quot;Autori::Shane Legg (la pagina non esiste)&quot;&gt;Autori::Shane Legg&lt;/a&gt;; &lt;a href=&quot;/index.php?title=Autori::Dario_Amodei&amp;amp;action=edit&amp;amp;redlink=1&quot; class=&quot;new&quot; title=&quot;Autori::Dario Amodei (la pagina non esiste)&quot;&gt;Autori::Dario Amodei&lt;/a&gt;  URL: &lt;a href=&quot;/index.php?title=URL::https://arxiv.org/pdf/1706.03741.pdf&amp;amp;action=edit&amp;amp;redlink=1&quot; class=&quot;new&quot; title=&quot;URL::https://arxiv.org/pdf/1706.03741.pdf (la pagina non esiste)&quot;&gt;URL::https://arxiv.org/pdf/1706.03741.pdf&lt;/a&gt;  Topic: &lt;a href=&quot;/index.php?title=Topic::Metodo_alternativo_che_utilizza_le_preferenze_umane_come_guida_per_l%27apprendimento&amp;amp;action=edit&amp;amp;redlink=1&quot; class=&quot;new&quot; title=&quot;Topic::Metodo alternativo che utilizza le preferenze umane come guida per l&amp;#039;apprendimento (la pagina non esiste)&quot;&gt;Topic::Metodo alternativo che utilizza le preferenze umane come guida per l&amp;#039;apprendimento&lt;/a&gt;  __SHOWFACTBOX__&amp;quot;&lt;/p&gt;
&lt;p&gt;&lt;b&gt;Nuova pagina&lt;/b&gt;&lt;/p&gt;&lt;div&gt;Titolo: [[Titolo::Deep Reinforcement Learning from Human Preferences]]&lt;br /&gt;
&lt;br /&gt;
Anno di pubblicazione: [[AnnoDiPubblicazione::2023]]&lt;br /&gt;
&lt;br /&gt;
Autori: [[Autori::Paul F Christiano]]; [[Autori::Jan Leike]]; [[Autori::Tom B Brown]]; [[Autori::Miljan Martic]]; [[Autori::Shane Legg]]; [[Autori::Dario Amodei]]&lt;br /&gt;
&lt;br /&gt;
URL: [[URL::https://arxiv.org/pdf/1706.03741.pdf]]&lt;br /&gt;
&lt;br /&gt;
Topic: [[Topic::Metodo alternativo che utilizza le preferenze umane come guida per l&amp;#039;apprendimento]]&lt;br /&gt;
&lt;br /&gt;
__SHOWFACTBOX__&lt;/div&gt;</summary>
		<author><name>Sara Maserati</name></author>
	</entry>
</feed>