Open Problems and Fundamental Limitations of Reinforcement Learning from Human Feedback.
Resource URI: https://dblp.l3s.de/d2r/resource/publications/journals/corr/abs-2307-15217
Home
|
Example Publications
Property
Value
dcterms:
bibliographicCitation
<
http://dblp.uni-trier.de/rec/bibtex/journals/corr/abs-2307-15217
>
dc:
creator
<
https://dblp.l3s.de/d2r/resource/authors/Anand_Siththaranjan
>
dc:
creator
<
https://dblp.l3s.de/d2r/resource/authors/Anca_D._Dragan
>
dc:
creator
<
https://dblp.l3s.de/d2r/resource/authors/Andi_Peng
>
dc:
creator
<
https://dblp.l3s.de/d2r/resource/authors/Charbel-Rapha%E2%88%9A%C4%99l_S%E2%88%9A%C2%A9gerie
>
dc:
creator
<
https://dblp.l3s.de/d2r/resource/authors/Claudia_Shi
>
dc:
creator
<
https://dblp.l3s.de/d2r/resource/authors/David_Krueger
>
dc:
creator
<
https://dblp.l3s.de/d2r/resource/authors/David_Lindner
>
dc:
creator
<
https://dblp.l3s.de/d2r/resource/authors/Dmitrii_Krasheninnikov
>
dc:
creator
<
https://dblp.l3s.de/d2r/resource/authors/Dorsa_Sadigh
>
dc:
creator
<
https://dblp.l3s.de/d2r/resource/authors/Dylan_Hadfield-Menell
>
dc:
creator
<
https://dblp.l3s.de/d2r/resource/authors/Erdem_Biyik
>
dc:
creator
<
https://dblp.l3s.de/d2r/resource/authors/Eric_J._Michaud
>
dc:
creator
<
https://dblp.l3s.de/d2r/resource/authors/J%E2%88%9A%C2%A9r%E2%88%9A%C2%A9my_Scheurer
>
dc:
creator
<
https://dblp.l3s.de/d2r/resource/authors/Jacob_Pfau
>
dc:
creator
<
https://dblp.l3s.de/d2r/resource/authors/Javier_Rando
>
dc:
creator
<
https://dblp.l3s.de/d2r/resource/authors/Lauro_Langosco
>
dc:
creator
<
https://dblp.l3s.de/d2r/resource/authors/Max_Nadeau
>
dc:
creator
<
https://dblp.l3s.de/d2r/resource/authors/Mehul_Damani
>
dc:
creator
<
https://dblp.l3s.de/d2r/resource/authors/Micah_Carroll
>
dc:
creator
<
https://dblp.l3s.de/d2r/resource/authors/Pedro_Freire
>
dc:
creator
<
https://dblp.l3s.de/d2r/resource/authors/Peter_Hase
>
dc:
creator
<
https://dblp.l3s.de/d2r/resource/authors/Phillip_J._K._Christoffersen
>
dc:
creator
<
https://dblp.l3s.de/d2r/resource/authors/Rachel_Freedman
>
dc:
creator
<
https://dblp.l3s.de/d2r/resource/authors/Samuel_Marks
>
dc:
creator
<
https://dblp.l3s.de/d2r/resource/authors/Stephen_Casper
>
dc:
creator
<
https://dblp.l3s.de/d2r/resource/authors/Stewart_Slocum
>
dc:
creator
<
https://dblp.l3s.de/d2r/resource/authors/Thomas_Krendl_Gilbert
>
dc:
creator
<
https://dblp.l3s.de/d2r/resource/authors/Tomasz_Korbak
>
dc:
creator
<
https://dblp.l3s.de/d2r/resource/authors/Tony_Wang
>
dc:
creator
<
https://dblp.l3s.de/d2r/resource/authors/Usman_Anwar
>
dc:
creator
<
https://dblp.l3s.de/d2r/resource/authors/Xander_Davies
>
dc:
creator
<
https://dblp.l3s.de/d2r/resource/authors/Xin_Chen
>
foaf:
homepage
<
http://dx.doi.org/doi.org%2F10.48550%2FarXiv.2307.15217
>
foaf:
homepage
<
https://doi.org/10.48550/arXiv.2307.15217
>
dc:
identifier
DBLP journals/corr/abs-2307-15217
(xsd:string)
dc:
identifier
DOI doi.org%2F10.48550%2FarXiv.2307.15217
(xsd:string)
dcterms:
issued
2023
(xsd:gYear)
swrc:
journal
<
https://dblp.l3s.de/d2r/resource/journals/corr
>
rdfs:
label
Open Problems and Fundamental Limitations of Reinforcement Learning from Human Feedback.
(xsd:string)
foaf:
maker
<
https://dblp.l3s.de/d2r/resource/authors/Anand_Siththaranjan
>
foaf:
maker
<
https://dblp.l3s.de/d2r/resource/authors/Anca_D._Dragan
>
foaf:
maker
<
https://dblp.l3s.de/d2r/resource/authors/Andi_Peng
>
foaf:
maker
<
https://dblp.l3s.de/d2r/resource/authors/Charbel-Rapha%E2%88%9A%C4%99l_S%E2%88%9A%C2%A9gerie
>
foaf:
maker
<
https://dblp.l3s.de/d2r/resource/authors/Claudia_Shi
>
foaf:
maker
<
https://dblp.l3s.de/d2r/resource/authors/David_Krueger
>
foaf:
maker
<
https://dblp.l3s.de/d2r/resource/authors/David_Lindner
>
foaf:
maker
<
https://dblp.l3s.de/d2r/resource/authors/Dmitrii_Krasheninnikov
>
foaf:
maker
<
https://dblp.l3s.de/d2r/resource/authors/Dorsa_Sadigh
>
foaf:
maker
<
https://dblp.l3s.de/d2r/resource/authors/Dylan_Hadfield-Menell
>
foaf:
maker
<
https://dblp.l3s.de/d2r/resource/authors/Erdem_Biyik
>
foaf:
maker
<
https://dblp.l3s.de/d2r/resource/authors/Eric_J._Michaud
>
foaf:
maker
<
https://dblp.l3s.de/d2r/resource/authors/J%E2%88%9A%C2%A9r%E2%88%9A%C2%A9my_Scheurer
>
foaf:
maker
<
https://dblp.l3s.de/d2r/resource/authors/Jacob_Pfau
>
foaf:
maker
<
https://dblp.l3s.de/d2r/resource/authors/Javier_Rando
>
foaf:
maker
<
https://dblp.l3s.de/d2r/resource/authors/Lauro_Langosco
>
foaf:
maker
<
https://dblp.l3s.de/d2r/resource/authors/Max_Nadeau
>
foaf:
maker
<
https://dblp.l3s.de/d2r/resource/authors/Mehul_Damani
>
foaf:
maker
<
https://dblp.l3s.de/d2r/resource/authors/Micah_Carroll
>
foaf:
maker
<
https://dblp.l3s.de/d2r/resource/authors/Pedro_Freire
>
foaf:
maker
<
https://dblp.l3s.de/d2r/resource/authors/Peter_Hase
>
foaf:
maker
<
https://dblp.l3s.de/d2r/resource/authors/Phillip_J._K._Christoffersen
>
foaf:
maker
<
https://dblp.l3s.de/d2r/resource/authors/Rachel_Freedman
>
foaf:
maker
<
https://dblp.l3s.de/d2r/resource/authors/Samuel_Marks
>
foaf:
maker
<
https://dblp.l3s.de/d2r/resource/authors/Stephen_Casper
>
foaf:
maker
<
https://dblp.l3s.de/d2r/resource/authors/Stewart_Slocum
>
foaf:
maker
<
https://dblp.l3s.de/d2r/resource/authors/Thomas_Krendl_Gilbert
>
foaf:
maker
<
https://dblp.l3s.de/d2r/resource/authors/Tomasz_Korbak
>
foaf:
maker
<
https://dblp.l3s.de/d2r/resource/authors/Tony_Wang
>
foaf:
maker
<
https://dblp.l3s.de/d2r/resource/authors/Usman_Anwar
>
foaf:
maker
<
https://dblp.l3s.de/d2r/resource/authors/Xander_Davies
>
foaf:
maker
<
https://dblp.l3s.de/d2r/resource/authors/Xin_Chen
>
owl:
sameAs
<
http://bibsonomy.org/uri/bibtexkey/journals/corr/abs-2307-15217/dblp
>
owl:
sameAs
<
http://dblp.rkbexplorer.com/id/journals/corr/abs-2307-15217
>
rdfs:
seeAlso
<
http://dblp.uni-trier.de/db/journals/corr/corr2307.html#abs-2307-15217
>
rdfs:
seeAlso
<
https://doi.org/10.48550/arXiv.2307.15217
>
dc:
title
Open Problems and Fundamental Limitations of Reinforcement Learning from Human Feedback.
(xsd:string)
dc:
type
<
http://purl.org/dc/dcmitype/Text
>
rdf:
type
swrc:Article
rdf:
type
foaf:Document
swrc:
volume
abs/2307.15217
(xsd:string)